def test_tuple_proportion(self): #TATTAGGCTGTGATGTGCTT #01234567890123456789 GI = reader.gff3_interval(self.gff3_1, fasta=self.fa) self.assertEqual(GI._tuple_proportion('Chr2', (0, 10, 0, 0)), (0.2, 0.4, 0.3, 0.1)) self.assertEqual(GI._tuple_proportion('Chr2', (5, 15, 1, 1)), (0.1, 0.3, 0.5, 0.1))
def test_gff3_12_fa_region_2_gene(self): GI = reader.gff3_interval(self.gff3_1, fasta=self.fa) GI.add_gff3(self.gff3_2, 'treat') ret = GI.calc_intersect_2('Chr1', 'control', 'treat', GI.element_dict['gene'], 1, p=99) self.assertEquals(ret, (1, 1, 1))
def test_gff3_1_fa(self): GI = reader.gff3_interval(self.gff3_1, fasta=self.fa) self._test_elem_sets(GI) self.assertEqual(GI.gff3_names, ['control']) for chrom in ('Chr1', 'Chr2'): self.assertEqual(GI._get_max(chrom), self.fa_max[chrom]) # Check sums for elem in GI.element_dict.keys(): fa, ra, ba = self._gen_arrays(GI, 'Chr1', elem, 1) for i, a in enumerate((fa, ra, ba)): self.assertEqual(a.shape, (1, self.fa_max['Chr1'])) self.assertEqual(np.sum(a), self.elems_sum1[elem][i])
def main(): fCheck = fileCheck() #class for checking parameters parser = argparse.ArgumentParser(description="A tool for comparing GFF3 annotations") parser.add_argument('-C', '--control', metavar='GFF3', help='Control GFF3. All comparisons are relative to this annotation.', required=True, type=fCheck.gff3) parser.add_argument('-R', '--reference', metavar='FASTA', \ help='Control reference (required for base pair metrics)', type=fCheck.fasta) parser.add_argument('--cname', metavar='STR', help='Name of control GFF3', default='control', type=str) parser.add_argument('-T', '--treat', metavar='GFF3', \ help='Space separated list of GFF3 files for comparison against the control', \ type=fCheck.gff3, required=True, nargs='+') parser.add_argument('-N', '--names', metavar='STR', \ help='Space separated list of names for treatment GFF3 files (name order must match file order)', \ type=str, required=True, nargs='+') parser.add_argument('-p', '--percent', metavar='INT', \ help='Reciprocal percent overlap threshold [%(default)s]', type=int, default=90) parser.add_argument('--plot', action="store_true", help="Plot venn diagrams of results") parser.add_argument('-e', '--ext', metavar='EXT', \ help='Figure extension [%(default)s]', default='png', \ type=argChecker(('pdf','png','eps'),'figure extension').check) parser.add_argument('-v', '--verbose', action="store_true", help='Enable verbose logging') parser.add_argument('--temd', action="store_true", help='Analyze TE metadata') args = parser.parse_args() ################################ # Configure logging ################################ if args.verbose: logger.setLevel(logging.DEBUG) logger.debug("DEBUG logging enabled") else: logger.setLevel(logging.INFO) ################################ # Check arguments ################################ if len(args.names) != len(args.treat): logger.error("treat(%i) != names(%i)"%(len(args.treat), len(args.names))) raise ValueError ################################ # Create GFF3 intervals ################################ GI = reader.gff3_interval(args.control, name=args.cname, fasta=args.reference) for f, n in zip(args.treat, args.names): GI.add_gff3(f, n) ################################ # Generate results ################################ fig_ext = args.ext if args.plot else False if args.reference: logger.info("Basepair resolution results") summaries.tabular(GI, fig_ext=fig_ext, temd=args.temd) logger.info("Interval results") summaries.tabular_region(GI, p=args.percent, fig_ext=fig_ext, temd=args.temd) logger.info("Done")
def test_gff3_12_nofa(self): GI = reader.gff3_interval(self.gff3_1) GI.add_gff3(self.gff3_2, 'treat') self._test_elem_sets(GI) self.assertEqual(GI.gff3_names, ['control', 'treat']) for chrom in ('Chr1', 'Chr2'): self.assertEqual(GI._get_max(chrom), self.gff3_max[chrom]) # Check sums for elem in GI.element_dict.keys(): fa, ra, ba = self._gen_arrays(GI, 'Chr1', elem, 1) for i, a in enumerate((fa, ra, ba)): self.assertEqual(a.shape, (2, self.gff3_max['Chr1'])) self.assertEqual(np.sum(a[0, :]), self.elems_sum1[elem][i]) self.assertEqual(np.sum(a[1, :]), self.elems_sum2[elem][i])
def test_get_proportion_arrays(self): #TATTAGGCTGTGATGTGCTT #01234567890123456789 # ----- ------ GI = reader.gff3_interval(self.gff3_1, fasta=self.fa) prop_array = GI.get_proportion_arrays('Chr2', 'control', 'exon', 1, strand=False) self.assertEqual(sorted(prop_array[0]), sorted([float(1) / 5, float(1) / 6])) #A self.assertEqual(sorted(prop_array[1]), sorted([float(2) / 5, float(3) / 6])) #T self.assertEqual(sorted(prop_array[2]), sorted([float(2) / 5, float(2) / 6])) #G self.assertEqual(sorted(prop_array[3]), sorted([float(0) / 5, float(0) / 6])) #C