def initialize_from_data(self,
                             reverse=False,
                             smoother="lowess",
                             force=False):

        # use the data in self.transformation_data to create the trafos
        for s_from, darr in self.transformation_data.items():
            self.transformations[s_from] = {}
            import time
            for s_to, data in darr.items():
                start = time.time()
                if not self.getTransformedData(s_from, s_to) is None:
                    sm = smoothing.SmoothingInterpolation()
                    sm.initialize(data[0],
                                  self.getTransformedData(s_from, s_to))
                    self._addTransformation(sm, s_from, s_to)
                    if reverse:
                        sm_rev = smoothing.SmoothingInterpolation()
                        sm_rev.initialize(
                            self.getTransformedData(s_from, s_to), data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                else:
                    sm = smoothing.getSmoothingObj(smoother)
                    sm.initialize(data[0], data[1])
                    self.transformations[s_from][s_to] = sm
                    if reverse:
                        sm_rev = smoothing.getSmoothingObj(smoother)
                        sm_rev.initialize(data[1], data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                print("Took %0.4fs to align %s against %s" %
                      (time.time() - start, s_to, s_from))
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                   realign_method, max_rt_diff, topN=5, sd_max_data_length=1000):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0))  )
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned))
        print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s))

    # Add data
    tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
    tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
    def initialize_from_data(self, reverse=False, smoother="lowess", force=False):

        # use the data in self.transformation_data to create the trafos
        for s_from, darr in self.transformation_data.items():
            self.transformations[s_from] = {}
            import time
            for s_to, data in darr.items():
                start = time.time()
                if not self.getTransformedData(s_from, s_to) is None:
                    sm = smoothing.SmoothingInterpolation()
                    sm.initialize(data[0], self.getTransformedData(s_from, s_to))
                    self._addTransformation(sm, s_from, s_to)
                    if reverse: 
                        sm_rev = smoothing.SmoothingInterpolation()
                        sm_rev.initialize(self.getTransformedData(s_from, s_to), data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                else:
                    sm = smoothing.getSmoothingObj(smoother)
                    sm.initialize(data[0], data[1])
                    self.transformations[s_from][s_to] = sm
                    if reverse: 
                        sm_rev = smoothing.getSmoothingObj(smoother)
                        sm_rev.initialize(data[1], data[0])
                        self._addTransformation(sm_rev, s_to, s_from)
                print("Took %0.4fs to align %s against %s" % (time.time() - start, s_to, s_from))
    def test_gettingOperator_obj(self):
        """
        Test getting the correct smoothing operator (new interface)
        """

        op = smoothing.getSmoothingObj("diRT")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("None")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("linear")
        self.assertTrue(isinstance(op, smoothing.SmoothingLinear))
    def test_gettingOperator_obj(self):
        """
        Test getting the correct smoothing operator (new interface)
        """

        op = smoothing.getSmoothingObj("diRT")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("None")
        self.assertTrue(isinstance(op, smoothing.SmoothingNull))

        op = smoothing.getSmoothingObj("linear")
        self.assertTrue(isinstance(op, smoothing.SmoothingLinear))

        op = smoothing.getSmoothingObj("splineR")
        self.assertTrue(isinstance(op, smoothing.SmoothingR))
    def _spline_align_runs(self, bestrun, run, multipeptides):
        """Will align run against bestrun"""

        sm = smoothing.getSmoothingObj(smoother=self.smoother,
                                       tmpdir=self.tmpdir_)

        # get those peptides we want to use for alignment => for this use the mapping
        # data1 = reference data (master)
        # data2 = data to be aligned (slave)
        data1, data2 = self._getRTData(bestrun, run, multipeptides)

        if len(data2) < 2:
            print "No common identifications between %s and %s. Only found %s features below a cutoff of %s" % (
                run.get_id(), bestrun.get_id(), len(data1),
                self.alignment_fdr_threshold_)
            print "If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)."
            raise Exception("Not enough datapoints (less than 2 datapoints).")

        # Since we want to predict how to convert from slave to master, slave
        # is first and master is second.
        sm.initialize(data2, data1)
        data2_aligned = sm.predict(data2)

        # Store transformation in collection (from run to bestrun)
        self.transformation_collection.addTransformationData([data2, data1],
                                                             run.get_id(),
                                                             bestrun.get_id())
        self.transformation_collection.addTransformedData(
            data2_aligned, run.get_id(), bestrun.get_id())

        stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned))
        median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned))
        print "Will align run %s against %s, using %s features" % (
            run.get_id(), bestrun.get_id(), len(data1))
        print "  Computed stdev", stdev, "and median", median

        # Store error for later
        d = self.transformation_error.transformations.get(run.get_id(), {})
        d[bestrun.get_id()] = [stdev, median]
        self.transformation_error.transformations[run.get_id()] = d

        # Now predict on _all_ data and write this back to the data
        i = 0
        all_pg = []
        for prgr in run:
            for pep in prgr:
                all_pg.extend([(pg.get_normalized_retentiontime(),
                                pg.get_feature_id())
                               for pg in pep.get_all_peakgroups()])
        rt_eval = [pg[0] for pg in all_pg]
        aligned_result = sm.predict(rt_eval)
        for prgr in run:
            for pep in prgr:
                # TODO hack -> direct access to the internal peakgroups object
                mutable = [list(pg) for pg in pep.peakgroups_]
                for k in range(len(mutable)):
                    mutable[k][2] = aligned_result[i]
                    i += 1
                pep.peakgroups_ = [tuple(m) for m in mutable]
    def test_gettingOperator_rpy2(self):
        """
        Test getting the correct smoothing operator
        """
        op = smoothing.get_smooting_operator()
        self.assertTrue(isinstance(op, smoothing.SmoothingR))

        op = smoothing.getSmoothingObj("splineR")
        self.assertTrue(isinstance(op, smoothing.SmoothingR))
    def _spline_align_runs(self, bestrun, run, multipeptides):
        """Will align run against bestrun"""

        sm = smoothing.getSmoothingObj(smoother = self.smoother, tmpdir = self.tmpdir_)

        # get those peptides we want to use for alignment => for this use the mapping
        # data1 = reference data (master)
        # data2 = data to be aligned (slave)
        data1,data2 = self._getRTData(bestrun, run, multipeptides)

        if len(data2) < 2:
            print("No common identifications between %s and %s. Only found %s features below a cutoff of %s" % ( 
                run.get_id(), bestrun.get_id(), len(data1), self.alignment_fdr_threshold_) )
            print("If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)." )
            raise Exception("Not enough datapoints (less than 2 datapoints).")

        # Since we want to predict how to convert from slave to master, slave
        # is first and master is second.
        sm.initialize(data2, data1)
        data2_aligned = sm.predict(data2)

        # Store transformation in collection (from run to bestrun)
        self.transformation_collection.addTransformationData([data2, data1], run.get_id(), bestrun.get_id() )
        self.transformation_collection.addTransformedData(data2_aligned, run.get_id(), bestrun.get_id() )

        stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned))
        median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned))
        print("Will align run %s against %s, using %s features" % (run.get_id(), bestrun.get_id(), len(data1)) )
        print("  Computed stdev", stdev, "and median", median )

        # Store error for later
        d = self.transformation_error.transformations.get(run.get_id(), {})
        d[bestrun.get_id()] = [stdev, median]
        self.transformation_error.transformations[ run.get_id() ] = d

        # Now predict on _all_ data and write this back to the data
        i = 0
        all_pg = []
        for prgr in run:
            for pep in prgr:
                all_pg.extend( [ (pg.get_normalized_retentiontime(), pg.get_feature_id()) for pg in pep.get_all_peakgroups()] )
        rt_eval = [ pg[0] for pg in all_pg]
        aligned_result = sm.predict(rt_eval)
        for prgr in run:
            for pep in prgr:
                # TODO hack -> direct access to the internal peakgroups object
                mutable = [list(pg) for pg in pep.peakgroups_]
                for k in range(len(mutable)):
                    mutable[k][2] = aligned_result[i]
                    i += 1
                pep.peakgroups_ = [ tuple(m) for m in mutable]
Exemple #9
0
def addDataToTrafo(tr_data,
                   run_0,
                   run_1,
                   spl_aligner,
                   multipeptides,
                   realign_method,
                   max_rt_diff,
                   topN=5,
                   sd_max_data_length=1000):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample(xrange(len(data_0)),
                                   min(sd_max_data_length, len(data_0)))
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(
            numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(
            numpy.array(data_0_s) - numpy.array(data1_aligned))
        print "stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(
            data_0_s)

    # Add data
    tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
    tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def addDataToTrafo(tr_data,
                   run_0,
                   run_1,
                   spl_aligner,
                   multipeptides,
                   realign_method,
                   max_rt_diff,
                   topN=5,
                   sd_max_data_length=5000,
                   force=False):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        print("Warning, zero data!")
        if force:
            null = smoothing.SmoothingNull()
            tr_data.addTrafo(id_0, id_1, null)
            tr_data.addTrafo(id_1, id_0, null)
            return
        else:
            raise Exception("No data available for alignment %s vs %s" %
                            (id_0, id_1))

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method,
                                       topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1,
                                       removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample(xrange(len(data_0)),
                                   min(sd_max_data_length, len(data_0)))
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(
            numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(
            numpy.array(data_0_s) - numpy.array(data1_aligned))
        print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0,
              "on data length", len(data_0_s))

    # Add data and trafo description.
    # The CyLightTransformationData actually requires to get a specific type of
    # transformation, the CyLinearInterpolateWrapper which may not be directly
    # passed to this function. We will try to recover the underlying linear
    # wrapper and then stick it into the tr_data object. If this fails, we just
    # revert to the regular behavior.
    try:
        sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP()
        sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP()
        tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0)
    except Exception:
        tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                   realign_method, max_rt_diff, topN=5, sd_max_data_length=5000, force=False):
    id_0 = run_0.get_id()
    id_1 = run_1.get_id()

    if id_0 == id_1:
        null = smoothing.SmoothingNull()
        tr_data.addTrafo(id_0, id_1, null)
        tr_data.addTrafo(id_1, id_0, null)
        return

    # Data
    data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides)
    tr_data.addData(id_0, data_0, id_1, data_1)

    # import pylab
    # pylab.scatter(data_0, data_1)
    # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) )
    # pylab.clf()
    # pylab.scatter(data_0, data_1)
    # pylab.xlim(2300, 2600)
    # pylab.ylim(2300, 2600)
    # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) )
    # pylab.clf()

    if len(data_0) == 0:
        print("Warning, zero data! Consider increasing the anchor point cutoff (--alignment_score) to include more peptides.")
        if force:
            null = smoothing.SmoothingNull()
            tr_data.addTrafo(id_0, id_1, null)
            tr_data.addTrafo(id_1, id_0, null)
            return
        else:
            raise Exception("No data available for alignment %s vs %s" % (id_0, id_1) )

    # Smoothers
    sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN,
                                       max_rt_diff=max_rt_diff,
                                       min_rt_diff=0.1, removeOutliers=False,
                                       tmpdir=None)
    # Initialize smoother
    sm_0_1.initialize(data_0, data_1)
    sm_1_0.initialize(data_1, data_0)

    # Compute error for alignment (standard deviation)
    stdev_0_1 = 0.0
    stdev_1_0 = 0.0
    if sd_max_data_length > 0:
        sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0))  )
        data_0_s = [data_0[i] for i in sample_idx]
        data_1_s = [data_1[i] for i in sample_idx]
        data0_aligned = sm_0_1.predict(data_0_s)
        stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned))
        data1_aligned = sm_1_0.predict(data_1_s)
        stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned))
        print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s))

    # Add data and trafo description.
    # The CyLightTransformationData actually requires to get a specific type of
    # transformation, the CyLinearInterpolateWrapper which may not be directly
    # passed to this function. We will try to recover the underlying linear
    # wrapper and then stick it into the tr_data object. If this fails, we just
    # revert to the regular behavior.
    try:
        sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP()
        sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP()
        tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0)
    except Exception:
        tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1)
        tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)