def check_feature_limits(self): if self.f.location.start > self.scaffold_size: self.wa_errors.append(WAError(WAError.OUTSIDE_SCAFFOLD_START, self)) if self.f.location.end > self.scaffold_size: self.wa_errors.append(WAError(WAError.OUTSIDE_SCAFFOLD_END, self)) fmax = None fmin = None buggy_strand = False for child in self.f.sub_features: if child.type == "mRNA": if not fmin or child.location.start < fmin: fmin = child.location.start if not fmax or child.location.end > fmax: fmax = child.location.end if not buggy_strand and child.location.strand != self.f.location.strand: self.wa_errors.append( WAError(WAError.WRONG_GENE_STRAND, self, {'expected': child.location.strand})) buggy_strand = True # Don't report for each mrna if fmin and fmin > self.f.location.start: self.wa_errors.append( WAError(WAError.WRONG_GENE_START, self, {'expected': fmin})) if fmax and fmax < self.f.location.end: self.wa_errors.append( WAError(WAError.WRONG_GENE_END, self, {'expected': fmax}))
def check_sub_features_mrna(self): for child in self.f.sub_features: if child.type != "mRNA": self.wa_errors.append( WAError( WAError.UNEXPECTED_SUB_FEATURE, self, { 'child_id': child.qualifiers['ID'][0], 'child_type': child.type }))
def check_sub_features(self): if len(self.f.sub_features) > 1: foundOverlap = False for sub1 in self.f.sub_features: firstChild = sub1 start1 = firstChild.location.start end1 = firstChild.location.end for sub2 in sub1.sub_features: if (sub1.qualifiers['ID'][0] != sub2.qualifiers['ID'][0]): start2 = sub2.location.start end2 = sub2.location.end if ((start2 >= start1 and start2 <= end1) or (end2 >= start1 and end2 <= end1) or (start1 >= start2 and start1 <= end2) or (end1 >= start2 and end1 <= end2)): foundOverlap = True if not foundOverlap: self.wa_errors.append( WAError(WAError.MULTIPLE_SUB_FEATURE, self, {'num_children': len(self.f.sub_features)})) else: self.check_multiple_mrnas()
def validate_genes(self): in_handle = open(self.in_file) for rec in GFF.parse(in_handle): for f in rec.features: if (f.type == "gene") and ( 'status' not in f.qualifiers or not f.qualifiers['status'] or f.qualifiers['status'][0].lower() != "deleted"): gene = Gene(f, rec.id, self.scaf_lengths[rec.id], self.allowed_groups, self.group_tags, self.no_group, self.split_users) self.all_genes[gene.wa_id] = gene # Count number of genes with goid if gene.has_goid: self.genes_with_goid += 1 # Collect stats on groups for g in gene.groups: if g not in self.groups_stats: self.groups_stats[g] = 0 self.groups_stats[g] += 1 new_part = gene.part new_allele = gene.allele # Collect wa_errors self.wa_errors.extend(gene.wa_errors) if not new_part and not new_allele: self.genes_seen_once += 1 # keep track of splitted genes if new_part: part_gene_key = gene.display_id if new_allele: part_gene_key = gene.display_id + ", allele " + new_allele if part_gene_key not in self.splitted_genes: self.splitted_genes[part_gene_key] = {} if new_part in self.splitted_genes[part_gene_key]: identical = self.splitted_genes[part_gene_key][ new_part] gene.errors.append( GeneError( GeneError.PART_SAME, gene, { 'other_name': identical.display_id, 'other_scaff': identical.scaffold, 'other_start': identical.f.location.start, 'other_end': identical.f.location.end })) self.splitted_genes[part_gene_key][new_part] = gene # keep track of duplicated genes if new_allele: allele_gene_key = gene.display_id if allele_gene_key not in self.duplicated_genes: self.duplicated_genes[allele_gene_key] = {} if new_allele in self.duplicated_genes[ allele_gene_key]: identical = self.duplicated_genes[allele_gene_key][ new_allele] if identical.part == new_part: gene.errors.append( GeneError( GeneError.ALLELE_SAME, gene, { 'other_name': identical.display_id, 'other_scaff': identical.scaffold, 'other_start': identical.f.location.start, 'other_end': identical.f.location.end })) self.duplicated_genes[allele_gene_key][ new_allele] = gene elif 'status' in f.qualifiers and f.qualifiers[ 'status'] and f.qualifiers['status'][0].lower( ) == "deleted": gene = Gene(f, rec.id, self.scaf_lengths[rec.id], self.allowed_groups, self.group_tags, self.no_group, self.split_users) self.all_genes[gene.wa_id] = gene else: fake_gene = Gene(f, rec.id, self.scaf_lengths[rec.id], self.allowed_groups, self.group_tags) self.wa_errors.append( WAError(WAError.UNEXPECTED_FEATURE, fake_gene)) in_handle.close()