def set_family_info(self): """ Extract the relevant genotype filters, as well all labels for each family in the database. """ self.families = families = Family.from_cursor(self.gq.c).values() args = self.args self.family_ids = [] self.family_masks = [] kwargs = {'only_affected': not getattr(self.args, "allow_unaffected", False), 'min_gq': args.min_gq} if self.model == "mendel_violations": kwargs = {'only_affected': self.args.only_affected} if self.model != "comp_het" and self.model != "mendel_violations": kwargs['strict'] = not self.args.lenient elif self.model == "comp_het": kwargs['pattern_only'] = self.args.pattern_only requested_fams = None if not args.families else set(args.families.split(",")) for family in families: if requested_fams is None or family.family_id in requested_fams: # e.g. family.auto_rec(gt_ll, min_depth) family_filter = getattr(family, self.model)(gt_ll=self.args.gt_phred_ll, min_depth=self.args.min_sample_depth, **kwargs) else: family_filter = 'False' self.family_masks.append(family_filter) self.family_ids.append(family.family_id)
def set_family_info(self): """ Extract the relevant genotype filters, as well all labels for each family in the database. """ self.families = families = Family.from_cursor(self.gq.c).values() args = self.args self.family_ids = [] self.family_masks = [] kwargs = { 'only_affected': not getattr(self.args, "allow_unaffected", False) } if self.model == "mendel_violations": kwargs = {'only_affected': self.args.only_affected} if self.model != "comp_het" and self.model != "mendel_violations": kwargs['strict'] = not self.args.lenient elif self.model == "comp_het": kwargs['pattern_only'] = self.args.pattern_only requested_fams = None if not args.families else set( args.families.split(",")) for family in families: if requested_fams is None or family.family_id in requested_fams: # e.g. family.auto_rec(gt_ll, min_depth) family_filter = getattr(family, self.model)( gt_ll=self.args.gt_phred_ll, min_depth=self.args.min_sample_depth, **kwargs) else: family_filter = 'False' self.family_masks.append(family_filter) self.family_ids.append(family.family_id)
def get_families(db, selected_families=None): """ Query the samples table to return a list of Family objects that each contain all of the Subjects in a Family. """ conn = sqlite3.connect(db) conn.isolation_level = None conn.row_factory = sqlite3.Row c = conn.cursor() families_dict = Family.from_cursor(c) # if the user has specified a set of selected families # to which the analysis should be restricted, then # first sanity check that the family ids they specified are valid. if selected_families is not None: for family in selected_families.split(','): if family not in families_dict: sys.exit("ERROR: family \"%s\" is not a valid family_id\n" % family) families = [] for fam in families_dict: if selected_families is None or fam in selected_families: families.append(families_dict[fam]) return families
def candidates(self): args = self.args self.gq._connect_to_database() fams = self.fams = Family.from_cursor(self.gq.conn) if args.families: fams = {f: fam for f, fam in fams.items() if f in set(args.families.split(","))} for grp, li in self.gen_candidates('gene'): samples_w_hetpair = defaultdict(list) sites, strs = [], [] for row in li: gt_types, gt_bases, gt_phases = row['gt_types'], row['gts'], row['gt_phases'] site = Site(row) site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types sites.append((str(site), site)) for family_id, fam in fams.items(): # if a site has been deemed "impossible", we store and then # skip it to avoid compuational overhead on it multiple times. impossible_sites = {} for i, (strsite1, site1) in enumerate(sites[:-1], start=1): if strsite1 in impossible_sites: continue for (strsite2, site2) in sites[i:]: if strsite2 in impossible_sites: continue ch = fam.comp_het_pair(site1.gt_types, site1.gt_bases, site2.gt_types, site2.gt_bases, site1.gt_phases, site2.gt_phases, ref1=site1.row['ref'], alt1=site1.row['alt'], ref2=site2.row['ref'], alt2=site2.row['alt'], allow_unaffected=args.allow_unaffected, fast_mode=True, pattern_only=args.pattern_only) if ch.get('impossible') == 'site1': impossible_sites[strsite1] = True break if ch.get('impossible') == 'site2': impossible_sites[strsite2] = True if not ch['candidate']: continue samples_w_hetpair[(site1, site2)].append(ch) yield grp, self.filter_candidates(samples_w_hetpair)
def candidates(self): args = self.args self.gq._connect_to_database() fams = self.fams = Family.from_cursor(self.gq.c) if args.families: fams = { f: fam for f, fam in fams.items() if f in set(args.families.split(",")) } for grp, li in self.gen_candidates('gene'): samples_w_hetpair = defaultdict(list) sites = [] for row in li: gt_types, gt_bases, gt_phases = row['gt_types'], row[ 'gts'], row['gt_phases'] site = Site(row) site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types sites.append(site) for i, site1 in enumerate(sites[:-1], start=1): for site2 in sites[i:]: for family_id, fam in fams.items(): ch = fam.comp_het_pair( site1.gt_types, site1.gt_bases, site2.gt_types, site2.gt_bases, site1.gt_phases, site2.gt_phases, ref1=site1.row['ref'], alt1=site1.row['alt'], ref2=site2.row['ref'], alt2=site2.row['alt'], allow_unaffected=args.allow_unaffected, fast_mode=True, pattern_only=args.pattern_only) if not ch['candidate']: continue samples_w_hetpair[(site1, site2)].append(ch) yield grp, self.filter_candidates(samples_w_hetpair)
def get_families(db, selected_families=None): """ Query the samples table to return a list of Family objects that each contain all of the Subjects in a Family. """ conn, metadata = database.get_session_metadata(db) families_dict = Family.from_cursor(conn) # if the user has specified a set of selected families # to which the analysis should be restricted, then # first sanity check that the family ids they specified are valid. if selected_families is not None: for family in selected_families.split(','): if family not in families_dict: raise ValueError("Family \"%s\" is not a valid family_id\n" % family) families = [] for fam in families_dict: if selected_families is None or fam in selected_families: families.append(families_dict[fam]) return families
def candidates(self): args = self.args self.gq._connect_to_database() fams = self.fams = Family.from_cursor(self.gq.conn) if args.families: fams = { f: fam for f, fam in fams.items() if f in set(args.families.split(",")) } for grp, li in self.gen_candidates('gene'): samples_w_hetpair = defaultdict(list) sites, strs = [], [] for row in li: gt_types, gt_bases, gt_phases = row['gt_types'], row[ 'gts'], row['gt_phases'] site = Site(row) site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types sites.append((str(site), site)) for family_id, fam in fams.items(): # if a site has been deemed "impossible", we store and then # skip it to avoid compuational overhead on it multiple times. impossible_sites = {} for i, (strsite1, site1) in enumerate(sites[:-1], start=1): if strsite1 in impossible_sites: continue for (strsite2, site2) in sites[i:]: if strsite2 in impossible_sites: continue ch = fam.comp_het_pair( site1.gt_types, site1.gt_bases, site2.gt_types, site2.gt_bases, site1.gt_phases, site2.gt_phases, ref1=site1.row['ref'], alt1=site1.row['alt'], ref2=site2.row['ref'], alt2=site2.row['alt'], allow_unaffected=args.allow_unaffected, fast_mode=True, pattern_only=args.pattern_only) if ch.get('impossible') == 'site1': impossible_sites[strsite1] = True break if ch.get('impossible') == 'site2': impossible_sites[strsite2] = True if not ch['candidate']: continue samples_w_hetpair[(site1, site2)].append(ch) yield grp, self.filter_candidates(samples_w_hetpair)