コード例 #1
0
 def test_load_families(self):
     """ check that load_families works correctly
     """
     
     # construct a temporary family that will have the same sample IDs etc
     # as for the one loaded from the ped file.
     family = Family("fam_ID")
     family.add_child("proband", 'dad', 'mom', 'F', '2', "/path/to/proband_vcf.gz")
     family.add_mother("mom", '0', '0', 'F', '1', "/path/to/mom_vcf.gz")
     family.add_father("dad", '0', '0', 'M', '1',  "/path/to/dad_vcf.gz")
     
     # load the ped file, and check that the load_families function returns
     # the expected Family object
     self.assertEqual(load_families(self.path), [family])
     
     # add an extra family, with multiple sibs
     self.tempfile.write("fam_ID2  proband2 dad2  mom2  F  2  /path/to/proband2_vcf.gz\n")
     self.tempfile.write("fam_ID2  dad2     0     0     M  1  /path/to/dad2_vcf.gz\n")
     self.tempfile.write("fam_ID2  mom2     0     0     F  1  /path/to/mom2_vcf.gz\n")
     self.tempfile.write("fam_ID2  sib      dad2  mom2  F  2  /path/to/sib_vcf.gz\n")
     self.tempfile.flush()
     
     # construct a temporary family that will have the same sample IDs etc
     # as for the one loaded from the ped file.
     fam2 = Family("fam_ID2")
     fam2.add_child("proband2", 'dad2', 'mom2', 'F', '2', "/path/to/proband2_vcf.gz")
     fam2.add_child("sib", 'dad2', 'mom2', 'F', '2', "/path/to/sib_vcf.gz")
     fam2.add_mother("mom2", '0', '0', 'F', '1', "/path/to/mom2_vcf.gz")
     fam2.add_father("dad2", '0', '0', 'M', '1', "/path/to/dad2_vcf.gz")
     
     # load the ped file, and check that the load_families function returns
     # the expected Families objects
     self.assertEqual(sorted(load_families(self.path)), sorted([family, fam2]))
コード例 #2
0
 def test_load_families(self):
     """ check that load_families works correctly
     """
     
     # construct a temporary family that will have the same sample IDs etc
     # as for the one loaded from the ped file.
     family = Family("fam_ID")
     family.add_child("proband", "/path/to/proband_vcf.gz", "2", "F")
     family.add_mother("mom", "/path/to/mom_vcf.gz", "1", "F")
     family.add_father("dad", "/path/to/dad_vcf.gz", "1", "M")
     
     # load the ped file, and check that the load_families function returns
     # the expected Family object
     families = load_families(self.path)
     self.assertEqual(families, {"fam_ID": family})
     
     # add an extra family, with multiple sibs
     self.tempfile.write("fam_ID2  proband2 dad2  mom2  F  2  /path/to/proband2_vcf.gz\n")
     self.tempfile.write("fam_ID2  dad2     0     0     M  1  /path/to/dad2_vcf.gz\n")
     self.tempfile.write("fam_ID2  mom2     0     0     F  1  /path/to/mom2_vcf.gz\n")
     self.tempfile.write("fam_ID2  sib      dad2  mom2  F  2  /path/to/sib_vcf.gz\n")
     self.tempfile.flush()
     
     # construct a temporary family that will have the same sample IDs etc
     # as for the one loaded from the ped file.
     fam2 = Family("fam_ID2")
     fam2.add_child("proband2", "/path/to/proband2_vcf.gz", "2", "F")
     fam2.add_child("sib", "/path/to/sib_vcf.gz", "2", "F")
     fam2.add_mother("mom2", "/path/to/mom2_vcf.gz", "1", "F")
     fam2.add_father("dad2", "/path/to/dad2_vcf.gz", "1", "M")
     
     # load the ped file, and check that the load_families function returns
     # the expected Families objects
     families = load_families(self.path)
     self.assertEqual(set(families.values()), set([family, fam2]))
コード例 #3
0
def split_pedigree_file(tempdir, ped_path, number_of_jobs, exclude_parents,
                        use_singletons_with_parents):
    """ split the ped file into multiple smaller ped files
    
    Args:
        tempname: string for the output path
        ped_path: path to pedigree file
        number_of_jobs: how many computational jobs to split the families over.
            Note that due to how the families are striuctured (siblings etc), we
            might get more files than this
        exclude_parents: true/false for whether to exclude parents from the run.
        use_singletons_with_parents: true/false for whether to exclude probands
            who have parents define, but where one or both parents genotypes are
            not yet available.
    
    Returns:
        The number of files that the cohort has been split across (which will
        now be the number of jobs to run).
    """

    families = load_families(ped_path)

    if not use_singletons_with_parents:
        families = [x for x in families if not is_singleton_without_parents(x)]

    # figure out how many families to include per file, in order to make the
    # correct number of jobs
    max_families = float(len(families)) / float(number_of_jobs)

    files_n = 1
    families_n = 0
    for family in families:
        families_n += 1

        if families_n > max_families:
            files_n += 1
            families_n = 1

        if families_n == 1:
            output_file = open(os.path.join(tempdir, "{}.ped".format(files_n)),
                               "w")

        for person in family:
            if person is None:
                continue

            line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
                person.family_id, person.get_id(),
                person.dad_id, person.mom_id, person.get_gender(),
                person.get_affected_status(), person.get_path())
            output_file.write(line)

    return files_n
コード例 #4
0
def split_pedigree_file(tempdir, ped_path, number_of_jobs, exclude_parents, use_singletons_with_parents):
    """ split the ped file into multiple smaller ped files
    
    Args:
        tempname: string for the output path
        ped_path: path to pedigree file
        number_of_jobs: how many computational jobs to split the families over.
            Note that due to how the families are striuctured (siblings etc), we
            might get more files than this
        exclude_parents: true/false for whether to exclude parents from the run.
        use_singletons_with_parents: true/false for whether to exclude probands
            who have parents define, but where one or both parents genotypes are
            not yet available.
    
    Returns:
        The number of files that the cohort has been split across (which will
        now be the number of jobs to run).
    """
    
    families = load_families(ped_path)
    
    if not use_singletons_with_parents:
        families = [ x for x in families if not is_singleton_without_parents(x) ]
    
    # figure out how many families to include per file, in order to make the
    # correct number of jobs
    max_families = float(len(families))/float(number_of_jobs)
    
    files_n = 1
    families_n = 0
    for family in families:
        families_n += 1
        
        if families_n > max_families:
            files_n += 1
            families_n = 1
        
        if families_n == 1:
            output_file = open(os.path.join(tempdir, "{}.ped".format(files_n)), "w")
        
        for person in family:
            if person is None:
                continue
            
            line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(person.family_id,
                person.get_id(), person.dad_id, person.mom_id,
                person.get_gender(), person.get_affected_status(),
                person.get_path())
            output_file.write(line)
    
    return files_n
コード例 #5
0
 def load_trio_paths(self):
     """sets the paths to the VCF files for a trio, or multiple trios.
     """
     if self.options.ped is None:
         family = ped.Family("blank_family_ID")
         family.add_child("child", self.options.child, "2", self.options.gender)
         if self.options.mother is not None:
             family.add_mother("mother", self.options.mother, self.options.mom_aff, "2")
         if self.options.father is not None:
             family.add_father("father", self.options.father, self.options.dad_aff, "1")
         
         self.families = {family.family_id: family}
     else:
         self.families = ped.load_families(self.options.ped)
コード例 #6
0
    def load_trio_paths(self):
        """sets the paths to the VCF files for a trio, or multiple trios.
        """
        if self.options.ped is None:
            family = ped.Family("blank_family_ID")
            family.add_child("child", self.options.child, "2",
                             self.options.gender)
            if self.options.mother is not None:
                family.add_mother("mother", self.options.mother,
                                  self.options.mom_aff, "2")
            if self.options.father is not None:
                family.add_father("father", self.options.father,
                                  self.options.dad_aff, "1")

            self.families = {family.family_id: family}
        else:
            self.families = ped.load_families(self.options.ped)
コード例 #7
0
def get_families(args):
    """ loads a list of Family objects for multiple families, or a single trio
    """
    
    if args.ped is None:
        fam_id = 'blank_family_ID'
        family = Family(fam_id)
        family.add_child('child', args.mother, args.father, args.gender, '2', args.child)
        if args.mother is not None:
            family.add_mother('mother', '0', '0', '2',  args.mom_aff, args.mother)
        if args.father is not None:
            family.add_father('father',  '0', '0', '1', args.dad_aff, args.father)
        
        families = [family]
    else:
        families = load_families(args.ped)
    
    return families
コード例 #8
0
def load_ped(ped_path, proband_id):
    """ loads the pedigree details for a prband
    
    Args:
        ped_path: path to pedigree file for cohort
        proband_ids: list of person_ids for probands of interest
    """

    families = load_families(ped_path)
    families = [
        f for f in families for x in f.children if x.get_id() == proband_id
    ]
    family = families[0]

    to_line = lambda x: '{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
        x.family_id, x.get_id(), x.dad_id, x.mom_id, x.get_gender(),
        x.get_affected_status(), x.get_path())

    return [to_line(x) for x in family if x is not None]
コード例 #9
0
def get_families(args):
    """ loads a list of Family objects for multiple families, or a single trio
    """

    if args.ped is None:
        fam_id = 'blank_family_ID'
        family = Family(fam_id)
        family.add_child('child', args.mother, args.father, args.gender, '2',
                         args.child)
        if args.mother is not None:
            family.add_mother('mother', '0', '0', '2', args.mom_aff,
                              args.mother)
        if args.father is not None:
            family.add_father('father', '0', '0', '1', args.dad_aff,
                              args.father)

        families = [family]
    else:
        families = load_families(args.ped)

    return families