def initialize_from_data(self, reverse=False, smoother="lowess", force=False): # use the data in self.transformation_data to create the trafos for s_from, darr in self.transformation_data.items(): self.transformations[s_from] = {} import time for s_to, data in darr.items(): start = time.time() if not self.getTransformedData(s_from, s_to) is None: sm = smoothing.SmoothingInterpolation() sm.initialize(data[0], self.getTransformedData(s_from, s_to)) self._addTransformation(sm, s_from, s_to) if reverse: sm_rev = smoothing.SmoothingInterpolation() sm_rev.initialize( self.getTransformedData(s_from, s_to), data[0]) self._addTransformation(sm_rev, s_to, s_from) else: sm = smoothing.getSmoothingObj(smoother) sm.initialize(data[0], data[1]) self.transformations[s_from][s_to] = sm if reverse: sm_rev = smoothing.getSmoothingObj(smoother) sm_rev.initialize(data[1], data[0]) self._addTransformation(sm_rev, s_to, s_from) print("Took %0.4fs to align %s against %s" % (time.time() - start, s_to, s_from))
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=1000): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0)) ) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned)) print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s)) # Add data tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def initialize_from_data(self, reverse=False, smoother="lowess", force=False): # use the data in self.transformation_data to create the trafos for s_from, darr in self.transformation_data.items(): self.transformations[s_from] = {} import time for s_to, data in darr.items(): start = time.time() if not self.getTransformedData(s_from, s_to) is None: sm = smoothing.SmoothingInterpolation() sm.initialize(data[0], self.getTransformedData(s_from, s_to)) self._addTransformation(sm, s_from, s_to) if reverse: sm_rev = smoothing.SmoothingInterpolation() sm_rev.initialize(self.getTransformedData(s_from, s_to), data[0]) self._addTransformation(sm_rev, s_to, s_from) else: sm = smoothing.getSmoothingObj(smoother) sm.initialize(data[0], data[1]) self.transformations[s_from][s_to] = sm if reverse: sm_rev = smoothing.getSmoothingObj(smoother) sm_rev.initialize(data[1], data[0]) self._addTransformation(sm_rev, s_to, s_from) print("Took %0.4fs to align %s against %s" % (time.time() - start, s_to, s_from))
def test_smooth_nn(self): """Test the univariate spline using local kernel smoothing""" sm = smoothing.WeightedNearestNeighbour(3, 5, 0.5, False) sm.initialize(self.data1, self.data2) r = sm.predict(self.data1) self.assertEqual(len(r), 8) # This is slightly better than the NoCV variation expected = [ 5.2723735408560302, 7.5434782608695654, 8.6875, 9.5625, 10.75, 15.6, 7.6671586996151504, 6.2922201138519931 ] for a, b in zip(expected, r): self.assertAlmostEqual(a, b) sm = smoothing.WeightedNearestNeighbour(3, 5, 0.1, False) sm.initialize(self.data1, self.data2) r = sm.predict(self.data1) self.assertEqual(len(r), 8) # This is slightly better than the NoCV variation expected = [ 5.4637421665174575, 8.0223880597014929, 9.2750000000000004, 9.78125, 10.750000000000002, 15.6, 8.1320351120742185, 6.4280968201233994 ] for a, b in zip(expected, r): self.assertAlmostEqual(a, b)
def test_gettingOperator_rpy2(self): """ Test getting the correct smoothing operator """ op = smoothing.get_smooting_operator() self.assertTrue(isinstance(op, smoothing.SmoothingR)) op = smoothing.getSmoothingObj("splineR") self.assertTrue(isinstance(op, smoothing.SmoothingR))
def test_gettingOperator(self): """ Test getting the correct smoothing operator """ op = smoothing.get_smooting_operator(use_linear=True) self.assertTrue(isinstance(op, smoothing.SmoothingLinear)) op = smoothing.get_smooting_operator(use_scikit=True) self.assertTrue(isinstance(op, smoothing.SmoothingPy)) op = smoothing.get_smooting_operator(use_external_r=True, tmpdir="tmp") self.assertTrue(isinstance(op, smoothing.SmoothingRExtern))
def test_gettingOperator_obj(self): """ Test getting the correct smoothing operator (new interface) """ op = smoothing.getSmoothingObj("diRT") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("None") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("linear") self.assertTrue(isinstance(op, smoothing.SmoothingLinear))
def test_smooth_nn(self): """Test the univariate spline using local kernel smoothing""" # Test with regular parameters sm = smoothing.WeightedNearestNeighbour(2, 5, 0.5, False) sm.initialize(self.data1, self.data2) r = sm.predict([5]) self.assertAlmostEqual(r[0], 4.85378590078) r = sm.predict([15]) self.assertAlmostEqual(r[0], 14.472485768500951) # Test with exponent sm = smoothing.WeightedNearestNeighbour(2, 5, 0.5, False, exponent=2.0) sm.initialize(self.data1, self.data2) r = sm.predict([5]) self.assertAlmostEqual(r[0], 4.4223582231809182) r = sm.predict([15]) self.assertAlmostEqual(r[0], 14.04993649085635) sm = smoothing.WeightedNearestNeighbour(3, 5, 0.5, False) sm.initialize(self.data1, self.data2) r = sm.predict(self.data1) self.assertEqual(len(r), 8) # This is slightly better than the NoCV variation expected = [ 4.85378590078329, 7.3181818181818183, 8.6853448275862046, 10.054730258014073, 11.044451654769629, 14.497816294331514, 7.4375518352136076, 6.2364096080910238 ] for a, b in zip(expected, r): self.assertAlmostEqual(a, b) # Try with smaller mindiff => more weight on close neighbors sm = smoothing.WeightedNearestNeighbour(3, 5, 0.1, False) sm.initialize(self.data1, self.data2) r = sm.predict(self.data1) self.assertEqual(len(r), 8) # This is slightly better than the NoCV variation expected = [ 4.3099526066350711, 7.0999999999999996, 8.8596153846153847, 10.610377054463424, 11.015838150289017, 14.1233812970026, 7.2064265084789056, 6.3871142393069835 ] for a, b in zip(expected, r): self.assertAlmostEqual(a, b)
def test_gettingOperator_obj(self): """ Test getting the correct smoothing operator (new interface) """ op = smoothing.getSmoothingObj("diRT") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("None") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("linear") self.assertTrue(isinstance(op, smoothing.SmoothingLinear)) op = smoothing.getSmoothingObj("splineR") self.assertTrue(isinstance(op, smoothing.SmoothingR))
def test_smooth_spline_scipy_uni(self): """Test the univariate spline using spline (no crossvalidation)""" sm = smoothing.UnivarSplineNoCV() sm.initialize(self.data1, self.data2) r = sm.predict(self.data1) self.assertEqual(len(r), 8) expected = [ 4.1457135130652265, 7.442333705513172, 8.9177273726462474, 10.248891199849604, 11.414111021721407, 13.991054262042756, 7.5957445613093642, 5.8444243638522186 ] self.assertEqual(len(r), 8) for a, b in zip(expected, r): self.assertAlmostEqual(a, b) r = sm.predict(self.data2) expected = [ 2.34266247, 7.2926131, 10.48943975, 11.85840597, 11.85840597, 13.48225519, 7.44184246, 6.61579704 ] self.assertEqual(len(r), 8) for a, b in zip(expected, r): self.assertTrue(abs(1 - a / b) < 0.05)
def _spline_align_runs(self, bestrun, run, multipeptides): """Will align run against bestrun""" sm = smoothing.getSmoothingObj(smoother=self.smoother, tmpdir=self.tmpdir_) # get those peptides we want to use for alignment => for this use the mapping # data1 = reference data (master) # data2 = data to be aligned (slave) data1, data2 = self._getRTData(bestrun, run, multipeptides) if len(data2) < 2: print "No common identifications between %s and %s. Only found %s features below a cutoff of %s" % ( run.get_id(), bestrun.get_id(), len(data1), self.alignment_fdr_threshold_) print "If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)." raise Exception("Not enough datapoints (less than 2 datapoints).") # Since we want to predict how to convert from slave to master, slave # is first and master is second. sm.initialize(data2, data1) data2_aligned = sm.predict(data2) # Store transformation in collection (from run to bestrun) self.transformation_collection.addTransformationData([data2, data1], run.get_id(), bestrun.get_id()) self.transformation_collection.addTransformedData( data2_aligned, run.get_id(), bestrun.get_id()) stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned)) median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned)) print "Will align run %s against %s, using %s features" % ( run.get_id(), bestrun.get_id(), len(data1)) print " Computed stdev", stdev, "and median", median # Store error for later d = self.transformation_error.transformations.get(run.get_id(), {}) d[bestrun.get_id()] = [stdev, median] self.transformation_error.transformations[run.get_id()] = d # Now predict on _all_ data and write this back to the data i = 0 all_pg = [] for prgr in run: for pep in prgr: all_pg.extend([(pg.get_normalized_retentiontime(), pg.get_feature_id()) for pg in pep.get_all_peakgroups()]) rt_eval = [pg[0] for pg in all_pg] aligned_result = sm.predict(rt_eval) for prgr in run: for pep in prgr: # TODO hack -> direct access to the internal peakgroups object mutable = [list(pg) for pg in pep.peakgroups_] for k in range(len(mutable)): mutable[k][2] = aligned_result[i] i += 1 pep.peakgroups_ = [tuple(m) for m in mutable]
def test_duplication(self): """ Test de-duplication of array data """ arr = [0, 0, 5, 6, 6, 7, 8, 8] sm = smoothing.SmoothingPy() de_dupl, duplications = sm.de_duplicate_array(arr) re_dupl = sm.re_duplicate_array(de_dupl, duplications) # the input and output need to be identical! self.assertEqual(re_dupl, arr)
def getTransformation(self, s_from, s_to): if s_from == s_to: # null smoothing return smoothing.SmoothingNull() try: return self.transformations[s_from][s_to] except KeyError: return None
def test_smooth_spline_r_extern(self): """Test the smoothing spline using external R""" sm = smoothing.SmoothingRExtern() sm.initialize(self.data1, self.data2) r = sm.predict(self.data2) expected = [ 2.34266247, 7.2926131, 10.48943975, 11.85840597, 11.85840597, 13.48225519, 7.44184246, 6.61579704 ] self.assertEqual(len(r), 8) for a, b in zip(expected, r): self.assertAlmostEqual(a, b)
def _spline_align_runs(self, bestrun, run, multipeptides): """Will align run against bestrun""" sm = smoothing.getSmoothingObj(smoother = self.smoother, tmpdir = self.tmpdir_) # get those peptides we want to use for alignment => for this use the mapping # data1 = reference data (master) # data2 = data to be aligned (slave) data1,data2 = self._getRTData(bestrun, run, multipeptides) if len(data2) < 2: print("No common identifications between %s and %s. Only found %s features below a cutoff of %s" % ( run.get_id(), bestrun.get_id(), len(data1), self.alignment_fdr_threshold_) ) print("If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)." ) raise Exception("Not enough datapoints (less than 2 datapoints).") # Since we want to predict how to convert from slave to master, slave # is first and master is second. sm.initialize(data2, data1) data2_aligned = sm.predict(data2) # Store transformation in collection (from run to bestrun) self.transformation_collection.addTransformationData([data2, data1], run.get_id(), bestrun.get_id() ) self.transformation_collection.addTransformedData(data2_aligned, run.get_id(), bestrun.get_id() ) stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned)) median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned)) print("Will align run %s against %s, using %s features" % (run.get_id(), bestrun.get_id(), len(data1)) ) print(" Computed stdev", stdev, "and median", median ) # Store error for later d = self.transformation_error.transformations.get(run.get_id(), {}) d[bestrun.get_id()] = [stdev, median] self.transformation_error.transformations[ run.get_id() ] = d # Now predict on _all_ data and write this back to the data i = 0 all_pg = [] for prgr in run: for pep in prgr: all_pg.extend( [ (pg.get_normalized_retentiontime(), pg.get_feature_id()) for pg in pep.get_all_peakgroups()] ) rt_eval = [ pg[0] for pg in all_pg] aligned_result = sm.predict(rt_eval) for prgr in run: for pep in prgr: # TODO hack -> direct access to the internal peakgroups object mutable = [list(pg) for pg in pep.peakgroups_] for k in range(len(mutable)): mutable[k][2] = aligned_result[i] i += 1 pep.peakgroups_ = [ tuple(m) for m in mutable]
def test_smooth_lowess(self): """Test the lowess smoothing""" sm = smoothing.LowessSmoothingBiostats() sm.initialize(self.data1, self.data2) r = sm.predict(self.data1) self.assertEqual(len(r), 8) # This is slightly better than the NoCV variation expected = [ 4.2123769729879061, 7.3305230831706876, 8.8162015867770727, 10.144542883530072, 11.507036080352814, 14.061195393451431, 7.4880128821482463, 5.7476045207327786 ] for a, b in zip(expected, r): self.assertAlmostEqual(a, b)
def test_smooth_spline_scipy_cv(self): """Test the univariate spline using spline (with crossvalidation)""" sm = smoothing.UnivarSplineCV() sm.initialize(self.data1, self.data2) r = sm.predict(self.data1) self.assertEqual(len(r), 8) # This is slightly better than the NoCV variation expected = [ 4.1626385843797094, 7.3804099239612029, 8.9667396489152544, 10.384851777100122, 11.316311505414465, 13.994282700490476, 7.5367306411050095, 5.8580352186337672 ] for a, b in zip(expected, r): self.assertTrue(abs(1.0 - a / b) < 0.1)
def test_smooth_spline_scikit(self): """Test the smoothing spline using scikit""" sm = smoothing.SmoothingPy() r = sm._smooth_spline_scikit(self.data1, self.data2) self.assertEqual(len(r), 8) self.assertAlmostEqual(r[0], 4.41118020) self.assertAlmostEqual(r[2], 8.7900826361) self.assertAlmostEqual(r[5], 14.1276411901) self.assertAlmostEqual(r[7], 5.90758396) r = sm._smooth_spline_scikit(self.data1, self.data2, [5, 7, 10.0]) self.assertEqual(len(r), 3) self.assertAlmostEqual(r[0], 4.6129201633) self.assertAlmostEqual(r[1], 7.73837621136) self.assertAlmostEqual(r[2], 10.3726686328) r = sm._smooth_spline_scikit(self.data1, self.data2, [10.0, 5.0, 7]) self.assertEqual(len(r), 3) self.assertAlmostEqual(r[0], 10.3726686328) self.assertAlmostEqual(r[1], 4.6129201633) self.assertAlmostEqual(r[2], 7.73837621136) # Since the (xhat,r) is optimized, different lamda values are estimated # and a different estimation is used here... r = sm._smooth_spline_scikit(self.data1, self.data2, [10.0, 5.0, 7, 10.00001]) self.assertEqual(len(r), 4) self.assertAlmostEqual(r[0], 11.60276344181) self.assertAlmostEqual(r[1], 4.3279294106253) self.assertAlmostEqual(r[2], 7.3603529478143) self.assertAlmostEqual(r[3], 11.60276823628391) r = sm._smooth_spline_scikit(self.data1, self.data2, [10.0, 5.0, 7, 10.0], True) self.assertEqual(len(r), 4) expected = [ 10.372668632871067, 4.6129201633602159, 7.738376211369804, 10.372668632871067 ] for res, exp in zip(r, expected): self.assertAlmostEqual(res, exp)
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=5000, force=False): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: print("Warning, zero data! Consider increasing the anchor point cutoff (--alignment_score) to include more peptides.") if force: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return else: raise Exception("No data available for alignment %s vs %s" % (id_0, id_1) ) # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0)) ) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned)) print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s)) # Add data and trafo description. # The CyLightTransformationData actually requires to get a specific type of # transformation, the CyLinearInterpolateWrapper which may not be directly # passed to this function. We will try to recover the underlying linear # wrapper and then stick it into the tr_data object. If this fails, we just # revert to the regular behavior. try: sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP() sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP() tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0) except Exception: tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=1000): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample(xrange(len(data_0)), min(sd_max_data_length, len(data_0))) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std( numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std( numpy.array(data_0_s) - numpy.array(data1_aligned)) print "stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len( data_0_s) # Add data tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=5000, force=False): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: print("Warning, zero data!") if force: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return else: raise Exception("No data available for alignment %s vs %s" % (id_0, id_1)) # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample(xrange(len(data_0)), min(sd_max_data_length, len(data_0))) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std( numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std( numpy.array(data_0_s) - numpy.array(data1_aligned)) print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s)) # Add data and trafo description. # The CyLightTransformationData actually requires to get a specific type of # transformation, the CyLinearInterpolateWrapper which may not be directly # passed to this function. We will try to recover the underlying linear # wrapper and then stick it into the tr_data object. If this fails, we just # revert to the regular behavior. try: sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP() sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP() tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0) except Exception: tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def test_smooth_spline_scikit_wrap(self): """Test the smoothing spline using scikit and the wrapper""" sm = smoothing.SmoothingPy() import numpy sm.initialize(self.data1, self.data2, xmin=numpy.min(numpy.array(self.data1)), xmax=numpy.max(numpy.array(self.data1))) r_pred = sm.predict(self.data1) r = sm._smooth_scikit_legacy(self.data1, self.data2, self.data1) self.assertEqual(len(r), 8) expected = [ 4.340432925607892, 7.412884078435387, 8.865581929053054, 10.157652480992748, 11.196560644581082, 14.158931578788266, 7.561207213410677, 5.90694677222806 ] for res, exp in zip(r, expected): self.assertAlmostEqual(res, exp) for res, exp in zip(r_pred, expected): self.assertAlmostEqual(res, exp) r = sm._smooth_scikit_legacy(self.data1, self.data2, [5, 7, 10.0]) self.assertEqual(len(r), 3) expected = [4.307319862409416, 7.423679061650855, 11.15630836580813] for res, exp in zip(r, expected): self.assertAlmostEqual(res, exp) r = sm._smooth_scikit_legacy(self.data1, self.data2, [10.0, 5.0, 7]) self.assertEqual(len(r), 3) expected = [11.15630836580813, 4.307319862409416, 7.423679061650855] for res, exp in zip(r, expected): self.assertAlmostEqual(res, exp) # Here, we expect the exact same results r = sm._smooth_scikit_legacy(self.data1, self.data2, [10.0, 5.0, 7, 10.0]) self.assertEqual(len(r), 4) expected = [ 11.15630836580813, 4.307319862409416, 7.423679061650855, 11.15630836580813 ] for res, exp in zip(r, expected): self.assertAlmostEqual(res, exp) # However, if we also evaluate at a point outside the previous window, we expect the results to change slightly r = sm._smooth_scikit_legacy(self.data1, self.data2, [10.0, 5.0, 7, 10.0, 15.0]) self.assertEqual(len(r), 5) expected = [ 11.196560644581082, 4.340432925607892, 7.412884078435387, 11.196560644581082, 14.158931578788266 ] for res, exp in zip(r, expected): self.assertAlmostEqual(res, exp) # If we chose a point that is very far away, we still expect a "reasonable" result (e.g. -100 becores -96) r = sm._smooth_scikit_legacy(self.data1, self.data2, [10.0, 5.0, 7, -100.0, 15.0]) self.assertEqual(len(r), 5) expected = [ 10.265638884711272, 5.411029040286351, 7.352910860216361, -96.53810165019972, 15.119857967104132 ] for res, exp in zip(r, expected): self.assertAlmostEqual(res, exp, 1)
def test_gettingOperator_rpy2(self): """ Test getting the correct smoothing operator """ op = smoothing.get_smooting_operator() self.assertTrue(isinstance(op, smoothing.SmoothingR))