Beispiel #1
0
    def test_base(self):
        def extract_data_sample(group, mset, dsample_name):
            by_individual = {}
            for i in self.kb.get_individuals(group):
                gds = filter(lambda x: x.snpMarkersSet == mset,
                             self.kb.get_data_samples(i, dsample_name))
                assert (len(gds) == 1)
                by_individual[i.id] = gds[0]
            return by_individual

        study = self.kb.get_study('TEST01')
        family = self.kb.get_individuals(study)
        mset = self.kb.get_snp_markers_set(label='FakeTaqSet01')
        gds_by_individual = extract_data_sample(study, mset,
                                                'GenotypeDataSample')

        pw = PedWriter(mset, base_path="./foo")
        pw.write_map()
        pw.write_family(study.id, family, gds_by_individual)
        pw.close()
    def test_base(self):
        def extract_data_sample(group, mset, dsample_name):
            by_individual = {}
            for i in self.kb.get_individuals(group):
                gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name))
                assert len(gds) == 1
                by_individual[i.id] = gds[0]
            return by_individual

        study = self.kb.get_study("TEST01")
        family = self.kb.get_individuals(study)
        mset = self.kb.get_snp_markers_set(label="FakeTaqSet01")
        gds_by_individual = extract_data_sample(study, mset, "GenotypeDataSample")

        pw = PedWriter(mset, base_path="./foo")
        pw.write_map()
        pw.write_family(study.id, family, gds_by_individual)
        pw.close()
  do = kb.add_gdo_data_object(action, data_sample, probs, conf)
  data_sample_by_id[ind.id] = data_sample

""" ..

Note how we first create a DataSample object (GenotypeDataSample)
which basically keeps track of the fact that there exists a genotyping
data set defined on a given snp markers set, and then we provide an
actual data object that describes the physical object that contains
the real data. The idea is that there could be many instances, data
equivalent, that link to the same DataSample, e.g., on different file
systems, in different formats and so on.
"""

""" ..

As an example, we will now write out the information we have just
saved as a plink pedfile.

"""
from bl.vl.genotype.io import PedWriter

pw = PedWriter(mset, base_path="/tmp/foo")

pw.write_map()

family_label = study.label
pw.write_family(family_label, family, data_sample_by_id)

pw.close()
Beispiel #4
0
  ms = kb.get_snp_markers_set(label=args.marker_set)
  if ms is None:
    msg = "marker set %s not present in kb, bailing out" % args.marker_set
    logger.critical(msg)
    sys.exit(msg)
  query = "from GenotypeDataSample g where g.snpMarkersSet.id = :id"
  params = {"id": ms.omero_id}
  gds = kb.find_all_by_query(query, params)
  logger.info("found %d data samples for marker set %s" %
              (len(gds), args.marker_set))
  logger.info("updating dep tree")
  kb.update_dependency_tree()
  individuals = [get_individual(kb, ds) for ds in gds]
  ds_by_ind_id = dict((i.id, ds) for i, ds in zip(individuals, gds))
  logger.info("getting families")
  families = get_all_families(kb)
  ped_writer = PedWriter(ms, base_path=args.prefix)
  logger.info("writing map file")
  ped_writer.write_map()
  logger.info("writing ped file")
  for i, fam in enumerate(families):
    if set(ds_by_ind_id.get(i.id) for i in fam) != set([None]):
      fam_label = "FAM_%d" % (i+1)
      logger.info("writing family %s" % fam_label)
      ped_writer.write_family(fam_label, fam, ds_by_ind_id)
  logger.info("all finished")


if __name__ == "__main__":
  main(sys.argv[1:])
    data_sample = kb.factory.create(kb.GenotypeDataSample, conf).save()
    probs, conf = make_fake_data(mset)
    do = kb.add_gdo_data_object(action, data_sample, probs, conf)
    data_sample_by_id[ind.id] = data_sample
""" ..

Note how we first create a DataSample object (GenotypeDataSample)
which basically keeps track of the fact that there exists a genotyping
data set defined on a given snp markers set, and then we provide an
actual data object that describes the physical object that contains
the real data. The idea is that there could be many instances, data
equivalent, that link to the same DataSample, e.g., on different file
systems, in different formats and so on.
"""
""" ..

As an example, we will now write out the information we have just
saved as a plink pedfile.

"""
from bl.vl.genotype.io import PedWriter

pw = PedWriter(mset, base_path="/tmp/foo")

pw.write_map()

family_label = study.label
pw.write_family(family_label, family, data_sample_by_id)

pw.close()