Ejemplo n.º 1
0
 def dump_gender_check(self, ofile):
     dt = DependencyTree(self.kb)
     data_samples, ds_to_do = self.pre_load_data()
     fnames = 'individual_id gender path mimetype size sha1'.split()
     tsv = csv.DictWriter(ofile,
                          fnames,
                          delimiter='\t',
                          lineterminator=os.linesep)
     tsv.writeheader()
     for ds in data_samples:
         v = dt.get_connected(ds)
         i = filter(lambda x: type(x) == self.kb.Individual, v)[0]
         if ds_to_do.has_key(ds.id):
             for do in ds_to_do[ds.id]:
                 r = {
                     'individual_id': i.id,
                     'gender': self.gender_str(i.gender),
                     'path': do.path,
                     'mimetype': do.mimetype,
                     'size': do.size,
                     'sha1': do.sha1,
                 }
                 tsv.writerow(r)
         else:
             self.logger.warn('there is no DataObject for %s[%s]' %
                              (ds.label, ds.id))
Ejemplo n.º 2
0
 def dump(self, study_label, ofile):
     if study_label is None:
         individuals = self.kb.get_objects(self.kb.Individual)
         study_label = "_ALL_"
     else:
         study = self.kb.get_study(study_label)
         if not study:
             msg = 'study %s does not exist' % study_label
             self.logger.critical(msg)
             raise ValueError(msg)
         individuals = [e.individual for e in self.kb.get_enrolled(study)]
     dt = DependencyTree(self.kb)
     self.dump_individuals(dt, study_label, individuals, ofile)
Ejemplo n.º 3
0
    def dump(self, args):
        self.ots = None
        self._field_names = []
        self.logger.info('start loading dependency tree')
        dt = DependencyTree(self.kb)
        self.logger.info('done loading dependency tree')

        def writeheader(*field_names):
            self._field_names = field_names
            self.ots = csv.DictWriter(args.ofile,
                                      self._field_names,
                                      delimiter='\t')
            self.ots.writeheader()

        def writerow(*field_values):
            d = dict(zip(self._field_names, field_values))
            self.ots.writerow(d)

        def Individuals(group):
            return self.kb.get_individuals(group)

        def DataSamples(individual, data_sample_klass_name='DataSample'):
            klass = getattr(self.kb, data_sample_klass_name)
            return dt.get_connected(individual, aklass=klass)

        def DataObjects(data_sample):
            q = """select o from DataObject as o join fetch o.sample as s
      where s.id = :sid"""
            return self.kb.find_all_by_query(q, {'sid': data_sample.omero_id})

        def enum_label(x):
            if isinstance(x, self.kb.Gender):
                if x == self.kb.Gender.MALE:
                    return 'MALE'
                if x == self.kb.Gender.FEMALE:
                    return 'FEMALE'

        code = args.code_file.read()
        group = self.kb.get_study(args.group)
        ccode = compile(code, '<string>', 'exec')
        exec ccode in locals()
Ejemplo n.º 4
0
 def dump_call_gt(self, ofile):
     if not self.data_collection:
         raise ValueError('data_collection %s is not known to KB' %
                          self.data_collection)
     dt = DependencyTree(self.kb)
     data_samples, ds_to_do = self.pre_load_data()
     fnames = [
         'dc_id', 'item_id', 'data_sample_label', 'path', 'gender',
         'mimetype', 'size', 'sha1'
     ]
     tsv = csv.DictWriter(ofile,
                          fnames,
                          delimiter='\t',
                          lineterminator=os.linesep)
     tsv.writeheader()
     dc_id = self.data_collection.id
     for ds in data_samples:
         v = dt.get_connected(ds, self.kb.Individual)
         assert len(v) == 1
         i = v[0]
         if ds_to_do.has_key(ds.id):
             for do in ds_to_do[ds.id]:
                 r = {
                     'dc_id': dc_id,
                     'data_sample_label': do.sample.label,
                     'item_id': do.sample.id,
                     'gender': self.gender_str(i.gender),
                     'path': do.path,
                     'mimetype': do.mimetype,
                     'size': do.size,
                     'sha1': do.sha1,
                 }
                 tsv.writerow(r)
         else:
             self.logger.warn('there is no DataObject for %s[%s]' %
                              (ds.label, ds.id))
Ejemplo n.º 5
0
 def dump(self, args, ots):
     if args.seed is None:
         random.seed()
     else:
         random.seed(args.seed)
     if args.study is None:
         individuals = self.kb.get_objects(self.kb.Individual)
     else:
         study = self.kb.get_study(args.study)
         if not study:
             self.__critical('study %s does not exist' % args.study)
         individuals = [e.individual for e in self.kb.get_enrolled(study)]
     if args.required_datasample:
         tech = getattr(self.kb, args.required_datasample)
         self.logger.info('start loading dependency tree')
         dt = DependencyTree(self.kb)
         self.logger.info('done loading dependency tree')
     else:
         tech = None
     atype_control = 'openEHR-EHR-EVALUATION.exclusion-problem_diagnosis.v1'
     atype_diagnosis = 'openEHR-EHR-EVALUATION.problem-diagnosis.v1'
     field = 'at0002.1'
     male_controls = []
     male_affected = []
     female_controls = []
     female_affected = []
     self.logger.debug('reference_disease: %s' % args.reference_disease)
     for i in individuals:
         if tech:
             v = dt.get_connected(i, tech)
             if not v:
                 continue
         ehr = self.kb.get_ehr(i)
         self.logger.debug('ehr: %s' % ehr.recs)
         if ehr.matches(atype_control):
             if i.gender == self.kb.Gender.MALE:
                 male_controls.append(i)
             else:
                 female_controls.append(i)
         elif ehr.matches(atype_diagnosis, field, args.reference_disease):
             if i.gender == self.kb.Gender.MALE:
                 male_affected.append(i)
             else:
                 female_affected.append(i)
     # TODO add checks for round-off effects
     total_affected = int((1.0 - args.control_fraction) * args.total_number)
     total_male_affected = int(total_affected * args.male_fraction)
     total_female_affected = int(total_affected *
                                 (1.0 - args.male_fraction))
     total_controls = int(args.control_fraction * args.total_number)
     total_male_controls = int(total_controls * args.male_fraction)
     total_female_controls = int(total_controls *
                                 (1.0 - args.male_fraction))
     if total_male_affected > len(male_affected):
         self.__critical('Requested %d affected males out of %d' %
                         (total_male_affected, len(male_affected)))
     if total_female_affected > len(female_affected):
         self.__critical('Requested %d affected females out of %d' %
                         (total_female_affected, len(female_affected)))
     if total_male_controls > len(male_controls):
         self.__critical('Requested %d controls males out of %d' %
                         (total_male_controls, len(male_controls)))
     if total_female_controls > len(female_controls):
         self.__critical('Requested %d controls females out of %d' %
                         (total_female_controls, len(female_controls)))
     s_male_controls = random.sample(male_controls, total_male_controls)
     self.logger.info('selected %d male controls out of %d' %
                      (len(s_male_controls), len(male_controls)))
     s_female_controls = random.sample(female_controls,
                                       total_female_controls)
     self.logger.info('selected %d female controls out of %d' %
                      (len(s_female_controls), len(female_controls)))
     s_male_affected = random.sample(male_affected, total_male_affected)
     self.logger.info('selected %d male affected out of %d' %
                      (len(s_male_affected), len(male_affected)))
     s_female_affected = random.sample(female_affected,
                                       total_female_affected)
     self.logger.info('selected %d female affected out of %d' %
                      (len(s_female_affected), len(female_affected)))
     all_individuals = (s_male_controls + s_female_controls +
                        s_male_affected + s_female_affected)
     for i, indy in enumerate(all_individuals):
         ots.writerow({
             'group': args.group_label,
             'group_code': 'I%04d' % i,
             'individual': indy.id,
         })
Ejemplo n.º 6
0
 def update_dependency_tree(self):
     self.dt = DependencyTree(self)