def initialize_from_data(self,
                             reverse=False,
                             smoother="lowess",
                             force=False):

        # use the data in self.transformation_data to create the trafos
        for s_from, darr in self.transformation_data.items():
            self.transformations[s_from] = {}
            import time
            for s_to, data in darr.items():
                start = time.time()
                if not self.getTransformedData(s_from, s_to) is None:
                    sm = smoothing.SmoothingInterpolation()
                    sm.initialize(data[0],
                                  self.getTransformedData(s_from, s_to))
                    self._addTransformation(sm, s_from, s_to)
                    if reverse:
                        sm_rev = smoothing.SmoothingInterpolation()
                        sm_rev.initialize(
                            self.getTransformedData(s_from, s_to), data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                else:
                    sm = smoothing.getSmoothingObj(smoother)
                    sm.initialize(data[0], data[1])
                    self.transformations[s_from][s_to] = sm
                    if reverse:
                        sm_rev = smoothing.getSmoothingObj(smoother)
                        sm_rev.initialize(data[1], data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                print("Took %0.4fs to align %s against %s" %
                      (time.time() - start, s_to, s_from))
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                   realign_method, max_rt_diff, topN=5, sd_max_data_length=1000):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0))  )
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned))
        print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s))

    # Add data
    tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
    tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
    def initialize_from_data(self, reverse=False, smoother="lowess", force=False):

        # use the data in self.transformation_data to create the trafos
        for s_from, darr in self.transformation_data.items():
            self.transformations[s_from] = {}
            import time
            for s_to, data in darr.items():
                start = time.time()
                if not self.getTransformedData(s_from, s_to) is None:
                    sm = smoothing.SmoothingInterpolation()
                    sm.initialize(data[0], self.getTransformedData(s_from, s_to))
                    self._addTransformation(sm, s_from, s_to)
                    if reverse: 
                        sm_rev = smoothing.SmoothingInterpolation()
                        sm_rev.initialize(self.getTransformedData(s_from, s_to), data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                else:
                    sm = smoothing.getSmoothingObj(smoother)
                    sm.initialize(data[0], data[1])
                    self.transformations[s_from][s_to] = sm
                    if reverse: 
                        sm_rev = smoothing.getSmoothingObj(smoother)
                        sm_rev.initialize(data[1], data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                print("Took %0.4fs to align %s against %s" % (time.time() - start, s_to, s_from))
Example #4
0
    def test_smooth_nn(self):
        """Test the univariate spline using local kernel smoothing"""
        sm = smoothing.WeightedNearestNeighbour(3, 5, 0.5, False)
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data1)
        self.assertEqual(len(r), 8)

        # This is slightly better than the NoCV variation
        expected = [
            5.2723735408560302, 7.5434782608695654, 8.6875, 9.5625, 10.75,
            15.6, 7.6671586996151504, 6.2922201138519931
        ]
        for a, b in zip(expected, r):
            self.assertAlmostEqual(a, b)

        sm = smoothing.WeightedNearestNeighbour(3, 5, 0.1, False)
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data1)
        self.assertEqual(len(r), 8)

        # This is slightly better than the NoCV variation
        expected = [
            5.4637421665174575, 8.0223880597014929, 9.2750000000000004,
            9.78125, 10.750000000000002, 15.6, 8.1320351120742185,
            6.4280968201233994
        ]
        for a, b in zip(expected, r):
            self.assertAlmostEqual(a, b)
    def test_gettingOperator_rpy2(self):
        """
        Test getting the correct smoothing operator
        """
        op = smoothing.get_smooting_operator()
        self.assertTrue(isinstance(op, smoothing.SmoothingR))

        op = smoothing.getSmoothingObj("splineR")
        self.assertTrue(isinstance(op, smoothing.SmoothingR))
    def test_gettingOperator(self):
        """
        Test getting the correct smoothing operator
        """
        op = smoothing.get_smooting_operator(use_linear=True)
        self.assertTrue(isinstance(op, smoothing.SmoothingLinear))

        op = smoothing.get_smooting_operator(use_scikit=True)
        self.assertTrue(isinstance(op, smoothing.SmoothingPy))

        op = smoothing.get_smooting_operator(use_external_r=True, tmpdir="tmp")
        self.assertTrue(isinstance(op, smoothing.SmoothingRExtern))
    def test_gettingOperator(self):
        """
        Test getting the correct smoothing operator
        """
        op = smoothing.get_smooting_operator(use_linear=True)
        self.assertTrue(isinstance(op, smoothing.SmoothingLinear))

        op = smoothing.get_smooting_operator(use_scikit=True)
        self.assertTrue(isinstance(op, smoothing.SmoothingPy))

        op = smoothing.get_smooting_operator(use_external_r=True, tmpdir="tmp")
        self.assertTrue(isinstance(op, smoothing.SmoothingRExtern))
    def test_gettingOperator_obj(self):
        """
        Test getting the correct smoothing operator (new interface)
        """

        op = smoothing.getSmoothingObj("diRT")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("None")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("linear")
        self.assertTrue(isinstance(op, smoothing.SmoothingLinear))
    def test_smooth_nn(self):
        """Test the univariate spline using local kernel smoothing"""

        # Test with regular parameters
        sm = smoothing.WeightedNearestNeighbour(2, 5, 0.5, False)
        sm.initialize(self.data1, self.data2)
        r = sm.predict([5])
        self.assertAlmostEqual(r[0], 4.85378590078)

        r = sm.predict([15])
        self.assertAlmostEqual(r[0], 14.472485768500951)

        # Test with exponent
        sm = smoothing.WeightedNearestNeighbour(2, 5, 0.5, False, exponent=2.0)
        sm.initialize(self.data1, self.data2)
        r = sm.predict([5])
        self.assertAlmostEqual(r[0], 4.4223582231809182)

        r = sm.predict([15])
        self.assertAlmostEqual(r[0], 14.04993649085635)

        sm = smoothing.WeightedNearestNeighbour(3, 5, 0.5, False)
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data1)
        self.assertEqual(len(r), 8)

        # This is slightly better than the NoCV variation
        expected = [
            4.85378590078329, 7.3181818181818183, 8.6853448275862046,
            10.054730258014073, 11.044451654769629, 14.497816294331514,
            7.4375518352136076, 6.2364096080910238
        ]

        for a, b in zip(expected, r):
            self.assertAlmostEqual(a, b)

        # Try with smaller mindiff => more weight on close neighbors
        sm = smoothing.WeightedNearestNeighbour(3, 5, 0.1, False)
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data1)
        self.assertEqual(len(r), 8)

        # This is slightly better than the NoCV variation
        expected = [
            4.3099526066350711, 7.0999999999999996, 8.8596153846153847,
            10.610377054463424, 11.015838150289017, 14.1233812970026,
            7.2064265084789056, 6.3871142393069835
        ]
        for a, b in zip(expected, r):
            self.assertAlmostEqual(a, b)
    def test_gettingOperator_obj(self):
        """
        Test getting the correct smoothing operator (new interface)
        """

        op = smoothing.getSmoothingObj("diRT")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("None")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("linear")
        self.assertTrue(isinstance(op, smoothing.SmoothingLinear))

        op = smoothing.getSmoothingObj("splineR")
        self.assertTrue(isinstance(op, smoothing.SmoothingR))
Example #11
0
    def test_smooth_spline_scipy_uni(self):
        """Test the univariate spline using spline (no crossvalidation)"""
        sm = smoothing.UnivarSplineNoCV()
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data1)
        self.assertEqual(len(r), 8)

        expected = [
            4.1457135130652265, 7.442333705513172, 8.9177273726462474,
            10.248891199849604, 11.414111021721407, 13.991054262042756,
            7.5957445613093642, 5.8444243638522186
        ]
        self.assertEqual(len(r), 8)
        for a, b in zip(expected, r):
            self.assertAlmostEqual(a, b)

        r = sm.predict(self.data2)
        expected = [
            2.34266247, 7.2926131, 10.48943975, 11.85840597, 11.85840597,
            13.48225519, 7.44184246, 6.61579704
        ]

        self.assertEqual(len(r), 8)
        for a, b in zip(expected, r):
            self.assertTrue(abs(1 - a / b) < 0.05)
    def _spline_align_runs(self, bestrun, run, multipeptides):
        """Will align run against bestrun"""

        sm = smoothing.getSmoothingObj(smoother=self.smoother,
                                       tmpdir=self.tmpdir_)

        # get those peptides we want to use for alignment => for this use the mapping
        # data1 = reference data (master)
        # data2 = data to be aligned (slave)
        data1, data2 = self._getRTData(bestrun, run, multipeptides)

        if len(data2) < 2:
            print "No common identifications between %s and %s. Only found %s features below a cutoff of %s" % (
                run.get_id(), bestrun.get_id(), len(data1),
                self.alignment_fdr_threshold_)
            print "If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)."
            raise Exception("Not enough datapoints (less than 2 datapoints).")

        # Since we want to predict how to convert from slave to master, slave
        # is first and master is second.
        sm.initialize(data2, data1)
        data2_aligned = sm.predict(data2)

        # Store transformation in collection (from run to bestrun)
        self.transformation_collection.addTransformationData([data2, data1],
                                                             run.get_id(),
                                                             bestrun.get_id())
        self.transformation_collection.addTransformedData(
            data2_aligned, run.get_id(), bestrun.get_id())

        stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned))
        median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned))
        print "Will align run %s against %s, using %s features" % (
            run.get_id(), bestrun.get_id(), len(data1))
        print "  Computed stdev", stdev, "and median", median

        # Store error for later
        d = self.transformation_error.transformations.get(run.get_id(), {})
        d[bestrun.get_id()] = [stdev, median]
        self.transformation_error.transformations[run.get_id()] = d

        # Now predict on _all_ data and write this back to the data
        i = 0
        all_pg = []
        for prgr in run:
            for pep in prgr:
                all_pg.extend([(pg.get_normalized_retentiontime(),
                                pg.get_feature_id())
                               for pg in pep.get_all_peakgroups()])
        rt_eval = [pg[0] for pg in all_pg]
        aligned_result = sm.predict(rt_eval)
        for prgr in run:
            for pep in prgr:
                # TODO hack -> direct access to the internal peakgroups object
                mutable = [list(pg) for pg in pep.peakgroups_]
                for k in range(len(mutable)):
                    mutable[k][2] = aligned_result[i]
                    i += 1
                pep.peakgroups_ = [tuple(m) for m in mutable]
Example #13
0
 def test_duplication(self):
     """
     Test de-duplication of array data
     """
     arr = [0, 0, 5, 6, 6, 7, 8, 8]
     sm = smoothing.SmoothingPy()
     de_dupl, duplications = sm.de_duplicate_array(arr)
     re_dupl = sm.re_duplicate_array(de_dupl, duplications)
     # the input and output need to be identical!
     self.assertEqual(re_dupl, arr)
    def getTransformation(self, s_from, s_to):

        if s_from == s_to:
            # null smoothing
            return smoothing.SmoothingNull()

        try:
            return self.transformations[s_from][s_to]
        except KeyError:
            return None
Example #15
0
    def test_smooth_spline_r_extern(self):
        """Test the smoothing spline using external R"""
        sm = smoothing.SmoothingRExtern()
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data2)
        expected = [
            2.34266247, 7.2926131, 10.48943975, 11.85840597, 11.85840597,
            13.48225519, 7.44184246, 6.61579704
        ]

        self.assertEqual(len(r), 8)
        for a, b in zip(expected, r):
            self.assertAlmostEqual(a, b)
    def _spline_align_runs(self, bestrun, run, multipeptides):
        """Will align run against bestrun"""

        sm = smoothing.getSmoothingObj(smoother = self.smoother, tmpdir = self.tmpdir_)

        # get those peptides we want to use for alignment => for this use the mapping
        # data1 = reference data (master)
        # data2 = data to be aligned (slave)
        data1,data2 = self._getRTData(bestrun, run, multipeptides)

        if len(data2) < 2:
            print("No common identifications between %s and %s. Only found %s features below a cutoff of %s" % ( 
                run.get_id(), bestrun.get_id(), len(data1), self.alignment_fdr_threshold_) )
            print("If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)." )
            raise Exception("Not enough datapoints (less than 2 datapoints).")

        # Since we want to predict how to convert from slave to master, slave
        # is first and master is second.
        sm.initialize(data2, data1)
        data2_aligned = sm.predict(data2)

        # Store transformation in collection (from run to bestrun)
        self.transformation_collection.addTransformationData([data2, data1], run.get_id(), bestrun.get_id() )
        self.transformation_collection.addTransformedData(data2_aligned, run.get_id(), bestrun.get_id() )

        stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned))
        median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned))
        print("Will align run %s against %s, using %s features" % (run.get_id(), bestrun.get_id(), len(data1)) )
        print("  Computed stdev", stdev, "and median", median )

        # Store error for later
        d = self.transformation_error.transformations.get(run.get_id(), {})
        d[bestrun.get_id()] = [stdev, median]
        self.transformation_error.transformations[ run.get_id() ] = d

        # Now predict on _all_ data and write this back to the data
        i = 0
        all_pg = []
        for prgr in run:
            for pep in prgr:
                all_pg.extend( [ (pg.get_normalized_retentiontime(), pg.get_feature_id()) for pg in pep.get_all_peakgroups()] )
        rt_eval = [ pg[0] for pg in all_pg]
        aligned_result = sm.predict(rt_eval)
        for prgr in run:
            for pep in prgr:
                # TODO hack -> direct access to the internal peakgroups object
                mutable = [list(pg) for pg in pep.peakgroups_]
                for k in range(len(mutable)):
                    mutable[k][2] = aligned_result[i]
                    i += 1
                pep.peakgroups_ = [ tuple(m) for m in mutable]
Example #17
0
    def test_smooth_lowess(self):
        """Test the lowess smoothing"""
        sm = smoothing.LowessSmoothingBiostats()
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data1)
        self.assertEqual(len(r), 8)

        # This is slightly better than the NoCV variation
        expected = [
            4.2123769729879061, 7.3305230831706876, 8.8162015867770727,
            10.144542883530072, 11.507036080352814, 14.061195393451431,
            7.4880128821482463, 5.7476045207327786
        ]
        for a, b in zip(expected, r):
            self.assertAlmostEqual(a, b)
Example #18
0
    def test_smooth_spline_scipy_cv(self):
        """Test the univariate spline using spline (with crossvalidation)"""
        sm = smoothing.UnivarSplineCV()
        sm.initialize(self.data1, self.data2)
        r = sm.predict(self.data1)
        self.assertEqual(len(r), 8)

        # This is slightly better than the NoCV variation
        expected = [
            4.1626385843797094, 7.3804099239612029, 8.9667396489152544,
            10.384851777100122, 11.316311505414465, 13.994282700490476,
            7.5367306411050095, 5.8580352186337672
        ]
        for a, b in zip(expected, r):
            self.assertTrue(abs(1.0 - a / b) < 0.1)
Example #19
0
    def test_smooth_spline_scikit(self):
        """Test the smoothing spline using scikit"""
        sm = smoothing.SmoothingPy()
        r = sm._smooth_spline_scikit(self.data1, self.data2)

        self.assertEqual(len(r), 8)
        self.assertAlmostEqual(r[0], 4.41118020)
        self.assertAlmostEqual(r[2], 8.7900826361)
        self.assertAlmostEqual(r[5], 14.1276411901)
        self.assertAlmostEqual(r[7], 5.90758396)

        r = sm._smooth_spline_scikit(self.data1, self.data2, [5, 7, 10.0])
        self.assertEqual(len(r), 3)
        self.assertAlmostEqual(r[0], 4.6129201633)
        self.assertAlmostEqual(r[1], 7.73837621136)
        self.assertAlmostEqual(r[2], 10.3726686328)

        r = sm._smooth_spline_scikit(self.data1, self.data2, [10.0, 5.0, 7])
        self.assertEqual(len(r), 3)
        self.assertAlmostEqual(r[0], 10.3726686328)
        self.assertAlmostEqual(r[1], 4.6129201633)
        self.assertAlmostEqual(r[2], 7.73837621136)

        # Since the (xhat,r) is optimized, different lamda values are estimated
        # and a different estimation is used here...
        r = sm._smooth_spline_scikit(self.data1, self.data2,
                                     [10.0, 5.0, 7, 10.00001])
        self.assertEqual(len(r), 4)
        self.assertAlmostEqual(r[0], 11.60276344181)
        self.assertAlmostEqual(r[1], 4.3279294106253)
        self.assertAlmostEqual(r[2], 7.3603529478143)
        self.assertAlmostEqual(r[3], 11.60276823628391)

        r = sm._smooth_spline_scikit(self.data1, self.data2,
                                     [10.0, 5.0, 7, 10.0], True)
        self.assertEqual(len(r), 4)
        expected = [
            10.372668632871067, 4.6129201633602159, 7.738376211369804,
            10.372668632871067
        ]
        for res, exp in zip(r, expected):
            self.assertAlmostEqual(res, exp)
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                   realign_method, max_rt_diff, topN=5, sd_max_data_length=5000, force=False):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        print("Warning, zero data! Consider increasing the anchor point cutoff (--alignment_score) to include more peptides.")
        if force:
            null = smoothing.SmoothingNull()
            tr_data.addTrafo(id_0, id_1, null)
            tr_data.addTrafo(id_1, id_0, null)
            return
        else:
            raise Exception("No data available for alignment %s vs %s" % (id_0, id_1) )

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0))  )
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned))
        print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s))

    # Add data and trafo description.
    # The CyLightTransformationData actually requires to get a specific type of
    # transformation, the CyLinearInterpolateWrapper which may not be directly
    # passed to this function. We will try to recover the underlying linear
    # wrapper and then stick it into the tr_data object. If this fails, we just
    # revert to the regular behavior.
    try:
        sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP()
        sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP()
        tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0)
    except Exception:
        tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
Example #21
0
def addDataToTrafo(tr_data,
                   run_0,
                   run_1,
                   spl_aligner,
                   multipeptides,
                   realign_method,
                   max_rt_diff,
                   topN=5,
                   sd_max_data_length=1000):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample(xrange(len(data_0)),
                                   min(sd_max_data_length, len(data_0)))
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(
            numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(
            numpy.array(data_0_s) - numpy.array(data1_aligned))
        print "stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(
            data_0_s)

    # Add data
    tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
    tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def addDataToTrafo(tr_data,
                   run_0,
                   run_1,
                   spl_aligner,
                   multipeptides,
                   realign_method,
                   max_rt_diff,
                   topN=5,
                   sd_max_data_length=5000,
                   force=False):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        print("Warning, zero data!")
        if force:
            null = smoothing.SmoothingNull()
            tr_data.addTrafo(id_0, id_1, null)
            tr_data.addTrafo(id_1, id_0, null)
            return
        else:
            raise Exception("No data available for alignment %s vs %s" %
                            (id_0, id_1))

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample(xrange(len(data_0)),
                                   min(sd_max_data_length, len(data_0)))
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(
            numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(
            numpy.array(data_0_s) - numpy.array(data1_aligned))
        print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0,
              "on data length", len(data_0_s))

    # Add data and trafo description.
    # The CyLightTransformationData actually requires to get a specific type of
    # transformation, the CyLinearInterpolateWrapper which may not be directly
    # passed to this function. We will try to recover the underlying linear
    # wrapper and then stick it into the tr_data object. If this fails, we just
    # revert to the regular behavior.
    try:
        sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP()
        sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP()
        tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0)
    except Exception:
        tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
Example #23
0
    def test_smooth_spline_scikit_wrap(self):
        """Test the smoothing spline using scikit and the wrapper"""
        sm = smoothing.SmoothingPy()
        import numpy
        sm.initialize(self.data1,
                      self.data2,
                      xmin=numpy.min(numpy.array(self.data1)),
                      xmax=numpy.max(numpy.array(self.data1)))
        r_pred = sm.predict(self.data1)
        r = sm._smooth_scikit_legacy(self.data1, self.data2, self.data1)
        self.assertEqual(len(r), 8)
        expected = [
            4.340432925607892, 7.412884078435387, 8.865581929053054,
            10.157652480992748, 11.196560644581082, 14.158931578788266,
            7.561207213410677, 5.90694677222806
        ]
        for res, exp in zip(r, expected):
            self.assertAlmostEqual(res, exp)
        for res, exp in zip(r_pred, expected):
            self.assertAlmostEqual(res, exp)

        r = sm._smooth_scikit_legacy(self.data1, self.data2, [5, 7, 10.0])
        self.assertEqual(len(r), 3)
        expected = [4.307319862409416, 7.423679061650855, 11.15630836580813]
        for res, exp in zip(r, expected):
            self.assertAlmostEqual(res, exp)

        r = sm._smooth_scikit_legacy(self.data1, self.data2, [10.0, 5.0, 7])
        self.assertEqual(len(r), 3)
        expected = [11.15630836580813, 4.307319862409416, 7.423679061650855]
        for res, exp in zip(r, expected):
            self.assertAlmostEqual(res, exp)

        # Here, we expect the exact same results
        r = sm._smooth_scikit_legacy(self.data1, self.data2,
                                     [10.0, 5.0, 7, 10.0])
        self.assertEqual(len(r), 4)
        expected = [
            11.15630836580813, 4.307319862409416, 7.423679061650855,
            11.15630836580813
        ]
        for res, exp in zip(r, expected):
            self.assertAlmostEqual(res, exp)

        # However, if we also evaluate at a point outside the previous window, we expect the results to change slightly
        r = sm._smooth_scikit_legacy(self.data1, self.data2,
                                     [10.0, 5.0, 7, 10.0, 15.0])
        self.assertEqual(len(r), 5)
        expected = [
            11.196560644581082, 4.340432925607892, 7.412884078435387,
            11.196560644581082, 14.158931578788266
        ]
        for res, exp in zip(r, expected):
            self.assertAlmostEqual(res, exp)

        # If we chose a point that is very far away, we still expect a "reasonable" result (e.g. -100 becores -96)
        r = sm._smooth_scikit_legacy(self.data1, self.data2,
                                     [10.0, 5.0, 7, -100.0, 15.0])
        self.assertEqual(len(r), 5)
        expected = [
            10.265638884711272, 5.411029040286351, 7.352910860216361,
            -96.53810165019972, 15.119857967104132
        ]
        for res, exp in zip(r, expected):
            self.assertAlmostEqual(res, exp, 1)
 def test_gettingOperator_rpy2(self):
     """
     Test getting the correct smoothing operator
     """
     op = smoothing.get_smooting_operator()
     self.assertTrue(isinstance(op, smoothing.SmoothingR))