if line.startswith('chrM') or line.startswith('chr25') or line.startswith('chrY') or line.startswith('chr24'): continue elif line.startswith('chrX') or line.startswith('chr23'): thisMarker = ChrXMarker() else: #TODO: more thorough check for validity of data format, like chr numbers?? thisMarker = AutosomalMarker() #get vcf columns vcfValues = line.strip().split('\t') #assert that all genotype values are numeric #TODO: verify that this assert works assert(all(v.isdigit() or v=="NA" for v in vcfValues[1:])) #1st column is variant id, 2nd onwards are sample genotypes #set this marker object's sample values thisMarker.markerID = vcfValues[0] thisMarker.getSampleGenotypes(vcfValues[1:],pedMemberIndices) #Note: thought of checking if chrX marker has heterozygous males, but it's not possible since the feature matrix comes in with encoded genotypes. #So all you can check is whether autosomal chrs have any genotypes other than 0/1/2/NA and chrX has any genotypes other that 0/1/NA for males, 0/1/2/NA for females #for chrX #TODO: verify that hasValidGenotypes() works if isinstance(thisMarker,ChrXMarker) and not thisMarker.hasValidGenotypes(pedNBGender,pedMemberType): print 'Invalid genotype found at ',thisMarker.markerID,'. This marker will not be tested.' continue #for autosomal chromosomes elif not isinstance(thisMarker,ChrXMarker) and not thisMarker.hasValidGenotypes(): print 'Invalid genotype found at ',thisMarker.markerID,'. This marker will not be tested.' continue #COMPUTE ALLELE FREQUENCY