Beispiel #1
0
 def get_genetic_test(cols, gene):
     idx = CanRiskPedigree.get_column_idx(gene)
     if idx < 0:
         raise PedigreeError("Genetic test column for '" + gene +
                             "not found.")
     gt = cols[idx].split(':')
     return GeneticTest(gt[0], gt[1])
Beispiel #2
0
 def get_genetic_test(cols, gene):
     idx = CanRiskPedigree.get_column_idx(gene, file_type)
     if idx < 0:
         if gene == "BARD1" and file_type == "canrisk1":
             return GeneticTest()
         raise PedigreeError("Genetic test column for '" + gene +
                             "not found.")
     gt = cols[idx].split(':')
     return GeneticTest(gt[0], gt[1])
Beispiel #3
0
    def validate(self):
        """ Validation check for pedigree input.
        @param p: Person to validate pedigree data.
        """
        if (len(self.famid) > settings.MAX_LENGTH_PEDIGREE_NUMBER_STR
                or not REGEX_ALPHANUM_HYPHENS.match(self.famid)
                or  # must be alphanumeric plus hyphen
                REGEX_ONLY_HYPHENS.match(self.famid) or  # but not just hyphens
                REGEX_ONLY_ZEROS.match(self.famid)):  # and not just zeros
            raise PedigreeError(
                "Family ID (1st data column) has been set to '" + self.famid +
                "'. Family IDs must be specified with between 1 and " +
                str(settings.MAX_LENGTH_PEDIGREE_NUMBER_STR) +
                " non-zero number or alphanumeric characters.")

        unconnected = self.unconnected()
        if len(unconnected) > 0:
            raise PedigreeError("Pedigree (" + self.famid +
                                ") family members are not physically " +
                                "connected to the target: " + str(unconnected))

        # Check that the index's parameters are valid
        target = self.get_target()
        if target.yob == '0':
            raise PedigreeError(
                "The target's year of birth has been set to '" + target.yob +
                "'. This person must be assigned a valid year of birth.")
        if target.age == '0':
            raise PedigreeError("The target's age has been set to '" +
                                target.age +
                                "'. This person must be assigned an age.")

        # Check that carrier probabilities / cancer risks can be computed
        carrier_probs = self.is_carrier_probs_viable(target=target)
        cancer_risks = self.is_risks_calc_viable(target=target)
        if (not carrier_probs and not cancer_risks):
            raise PedigreeError(
                "BOADICEA cannot compute mutation carrier probabilities because the target '"
                + target.pid +
                "' has a positive genetic test. Also BOADICEA cannot compute breast and ovarian cancer "
                "risks because the target is: (1) over " +
                str(settings.MAX_AGE_FOR_RISK_CALCS) +
                " years old or (2) male, or (3) an affected female who has developed contralateral "
                "breast cancer, ovarian cancer or pancreatic cancer.")

        #
        # Check monozygotic (MZ) twin data
        twin_store = self.get_twins()

        # Check that MZ siblings are only specified as twins, no identical triplets etc
        for t in twin_store:
            twins = twin_store[t]
            if len(twins) != 2:
                raise PedigreeError(
                    "MZ twin identifier '" + str(twins[0].pid) +
                    "' does not appear twice in the pedigree file. "
                    "Only MZ twins are permitted in the pedigree, MZ triplets or quads are not allowed."
                )

            # Check MZ twin characters are valid
            if len(t) != 1 or t not in settings.UNIQUE_TWIN_IDS:
                raise PedigreeError(
                    "Invalid MZ twin character '" + t +
                    "'. MZ twins must be identified using one " +
                    "of the following ASCII characters: " +
                    str(settings.UNIQUE_TWIN_IDS) + ".")

            # Check that monozygotic (MZ) twin data are consistent
            if (twins[0].mothid != twins[1].mothid
                    or twins[0].fathid != twins[1].fathid):
                raise PedigreeError(
                    "Monozygotic (MZ) twins identified with the character '" +
                    t + "' have different "
                    "parents. MZ twins must have the same parents.")
            if (twins[0].yob != twins[1].yob):
                raise PedigreeError(
                    "Monozygotic (MZ) twins identified with the character '" +
                    t + "' have different "
                    "years of birth. MZ twins must have the same year of birth."
                )

            # Check that living MZ twins have the same age at last follow up
            if (twins[0].dead == '0' and twins[1].dead == '0'
                    and twins[0].age != twins[1].age):
                raise PedigreeError(
                    "Monozygotic (MZ) twins identified with the character '" +
                    t + "' have different "
                    "ages. If both MZ twins are alive, they must have the same age at last follow up."
                )

            if twins[0].sex() != twins[1].sex():
                raise PedigreeError(
                    "Monozygotic (MZ) twins identified with the character '" +
                    t + "' have a different "
                    "sex. MZ twins must have the same sex.")

            # Check that the MZ twins have the same genetic status
            if not GeneticTest.compareTestResults(twins[0], twins[1]):
                raise PedigreeError(
                    "Monozygotic (MZ) twins have both had a genetic test, but the genetic test results "
                    "for these individuals are different. Under these circumstances, the genetic test "
                    "results must be the same.")

        # Check to ensure that the maximum number of MZ twin pairs per pedigree has not been exceeded
        if len(twin_store.keys()) > settings.MAX_NUMBER_MZ_TWIN_PAIRS:
            raise PedigreeError(
                "Maximum number of MZ twin pairs has been exceeded. Input pedigrees must have a "
                "maximum of " + str(settings.MAX_NUMBER_MZ_TWIN_PAIRS) +
                " MZ twin pairs.")
Beispiel #4
0
    def __init__(self,
                 pedigree_records=None,
                 people=None,
                 file_type=None,
                 bc_risk_factor_code=None,
                 oc_risk_factor_code=None,
                 bc_prs=None,
                 oc_prs=None):
        """
        @keyword pedigree_records: the pedigree records section of the BOADICEA import pedigree file.
        @keyword people: members of the pedigree.
        @keyword file_type: file type is 'bwa' or 'canrisk'.
        @keyword bc_risk_factor_code: breast cancer risk factor code
        @keyword oc_risk_factor_code: ovarian cancer risk factor code
        @keyword bc_prs: breast cancer PRS
        @keyword oc_prs: ovarian cancer PRS
        """
        self.people = []
        if pedigree_records is not None:
            self.famid = pedigree_records[0].split()[0]
            ids = []
            for record in pedigree_records:
                p = Person.factory(record, file_type=file_type)
                if p.target != '0' and p.target != '1':
                    raise PedigreeError(
                        "A value in the Target data column has been set to '" +
                        p.target +
                        "'. Target column parameters must be set to '0' or '1'."
                    )
                if p.is_target():
                    self.target = p

                if p.pid in ids:
                    raise PedigreeError(
                        "Individual ID '" + p.pid +
                        "' appears more than once in the pedigree file.")
                else:
                    ids.append(p.pid)
                self.people.append(p)
        if people is not None:
            self.people.extend(people)
            self.famid = self.people[0].famid

        ntarget = 0
        for person in self.people:
            if person.is_target():
                ntarget += 1

        pedigree_size = len(self.people)
        if ntarget != 1:
            raise PedigreeError(
                "Pedigree (" + self.famid +
                ") has either no index or more than 1 " +
                "index individuals. Only one target can be specified.")
        if pedigree_size > settings.MAX_PEDIGREE_SIZE or pedigree_size < settings.MIN_BASELINE_PEDIGREE_SIZE:
            raise PedigreeError("Pedigree (" + self.famid +
                                ") has unexpected number of family members " +
                                str(pedigree_size))
        if file_type == 'canrisk':
            if bc_risk_factor_code is not None:
                self.bc_risk_factor_code = bc_risk_factor_code
            if oc_risk_factor_code is not None:
                self.oc_risk_factor_code = oc_risk_factor_code
            if bc_prs is not None:
                self.bc_prs = bc_prs
            if oc_prs is not None:
                self.oc_prs = oc_prs
Beispiel #5
0
    def factory(ped_file_line, file_type=None):
        ''' Factory method for creating types of people given a record from
        a BOADICEA import pedigree file .
        @type  ped_file_line: str
        @param ped_file_line: Pedigree file line.
        '''
        cols = ped_file_line.split()

        famid = cols[0]
        name = cols[1]
        pid = cols[3]
        cancers = Cancers(bc1=Cancer(cols[11] if cols[11] != "0" else "-1"),
                          bc2=Cancer(cols[12] if cols[12] != "0" else "-1"),
                          oc=Cancer(cols[13] if cols[13] != "0" else "-1"),
                          prc=Cancer(cols[14] if cols[14] != "0" else "-1"),
                          pac=Cancer(cols[15] if cols[15] != "0" else "-1"))

        # use column headers to get gene test type and result
        if file_type == 'bwa':
            gtests = BWSGeneticTests.factory([
                GeneticTest(cols[BwaPedigree.get_column_idx(gene + 't')],
                            cols[BwaPedigree.get_column_idx(gene + 'r')])
                for gene in settings.BC_MODEL['GENES']
            ])
            pathology = PathologyTests(
                er=PathologyTest(PathologyTest.ESTROGEN_RECEPTOR_TEST,
                                 cols[27]),
                pr=PathologyTest(PathologyTest.PROGESTROGEN_RECEPTOR_TEST,
                                 cols[28]),
                her2=PathologyTest(PathologyTest.HER2_TEST, cols[29]),
                ck14=PathologyTest(PathologyTest.CK14_TEST, cols[30]),
                ck56=PathologyTest(PathologyTest.CK56_TEST, cols[31]))
        else:
            genes = settings.BC_MODEL['GENES'] + settings.OC_MODEL['GENES'][2:]

            def get_genetic_test(cols, gene):
                idx = CanRiskPedigree.get_column_idx(gene)
                if idx < 0:
                    raise PedigreeError("Genetic test column for '" + gene +
                                        "not found.")
                gt = cols[idx].split(':')
                return GeneticTest(gt[0], gt[1])

            gtests = CanRiskGeneticTests.factory(
                [get_genetic_test(cols, gene) for gene in genes])

            path = cols[len(CanRiskPedigree.COLUMNS) - 1].split(':')
            pathology = PathologyTests(
                er=PathologyTest(PathologyTest.ESTROGEN_RECEPTOR_TEST,
                                 path[0]),
                pr=PathologyTest(PathologyTest.PROGESTROGEN_RECEPTOR_TEST,
                                 path[1]),
                her2=PathologyTest(PathologyTest.HER2_TEST, path[2]),
                ck14=PathologyTest(PathologyTest.CK14_TEST, path[3]),
                ck56=PathologyTest(PathologyTest.CK56_TEST, path[4]))

        if cols[6] == 'M':
            return Male(famid,
                        name,
                        pid,
                        fathid=cols[4],
                        mothid=cols[5],
                        target=cols[2],
                        dead=cols[8],
                        age=cols[9],
                        yob=cols[10],
                        ashkn=cols[16],
                        cancers=cancers,
                        mztwin=cols[7],
                        gtests=gtests,
                        pathology=pathology)
        elif cols[6] == 'F':
            return Female(famid,
                          name,
                          pid,
                          fathid=cols[4],
                          mothid=cols[5],
                          target=cols[2],
                          dead=cols[8],
                          age=cols[9],
                          yob=cols[10],
                          ashkn=cols[16],
                          cancers=cancers,
                          mztwin=cols[7],
                          gtests=gtests,
                          pathology=pathology)
        else:
            raise PedigreeError(
                "The sex of family member '" + name + "' is invalid. An " +
                "individuals sex must be specified as 'M' or 'F' only.")