def test_cmp_vrt_iter_vrt2(self):
     vs1 = VariantSetFromFile(pkg_file('genomvar.test',
                                       'data/example1.vcf'))
     vs2 = VariantSetFromFile(
         pkg_file('genomvar.test', 'data/example_gnomad_1.vcf.gz'))
     vrt = list(vs1.iter_vrt())
     self.assertEqual(len(list(vs1.diff_vrt(vs2).iter_vrt())), len(vrt))
Exemple #2
0
 def test_submission_with_bg(self):
     input_lines = open(pkg_file('GOnet', 'data/tests/genelist3.csv'),
                        'r').read()
     bg_file = open(
         pkg_file('GOnet', 'data/tests/CD8_cells_background_TPM10.lst'),
         'r')
     request_data = {
         'submit': ['Submit'],
         'paste_data': [input_lines],
         'bg_file': [bg_file],
         'namespace': ['biological_process'],
         'analysis_type': ['enrich'],
         'output_type': ['graph'],
         'csv_separator': [','],
         'qvalue': [0.0001]
     }
     threads = []
     seen = set()
     for n in range(200):
         bg_file.seek(0)
         t = Thread(target=c.post,
                    args=(urls.reverse('GOnet-submit-form'), request_data))
         t.daemon = True
         t.start()
         threads.append(t)
         sleep(random.random() * 10)
         print('>!< Spamming task', len(threads))
         print('new vars', set(vars().keys()).difference(seen))
         seen = set(vars().keys())
     for t in threads:
         t.join()
     print('deleting threads')
     del threads
     print('sleeping......................................')
     sleep(100000000)
Exemple #3
0
 def test_wrong_chrom_name_in_ref(self):
     ref = Reference(pkg_file(__name__, 'data/chr25.fasta'))
     vset = VariantSetFromFile(pkg_file('genomvar.test',
                                        'data/example1.vcf.gz'),
                               reference=ref,
                               index=True)
     self.assertEqual(len(list(vset.find_vrt(rgn='chr24:1200-1210'))), 2)
     ref.close()
 def test_diff_callback(self):
     s1 = VariantSetFromFile(pkg_file('genomvar.test', 'data/example3.vcf'))
     s2 = VariantSetFromFile(pkg_file('genomvar.test', 'data/example3.vcf'))
     cb = lambda m: [v.attrib['vcf_notation']['row'] for v in m]
     for N, vrt in enumerate(s1.comm_vrt(s2).iter_vrt(callback=cb)):
         self.assertEqual(vrt.attrib['vcf_notation']['row'],
                          vrt.attrib['cmp'][0])
     self.assertEqual(N, 7)
 def test_cmp_stream(self):
     s1 = VariantSetFromFile(pkg_file('genomvar.test', 'data/example1.vcf'))
     s2 = VariantSetFromFile(
         pkg_file('genomvar.test', 'data/example2.vcf.gz'))
     nofv = 0
     for vrt in s1.diff_vrt(s2).iter_vrt():
         nofv += vrt.nof_unit_vrt()
     self.assertEqual(nofv, 14)
Exemple #6
0
    def test_find_vrt(self):
        ivfs = VariantSetFromFile(pkg_file('genomvar.test',
                                           'data/example2.vcf.gz'),
                                  index=True)
        vs = VariantSet.from_vcf(
            pkg_file('genomvar.test', 'data/example2.vcf.gz'))

        self.assertEqual(
            sum([v.nof_unit_vrt() for v in ivfs.find_vrt('chr24')]),
            sum([v.nof_unit_vrt() for v in vs.find_vrt('chr24')]))

        self.assertEqual(sum([v.nof_unit_vrt() for v in ivfs.iter_vrt()]),
                         sum([v.nof_unit_vrt() for v in vs.iter_vrt()]))
Exemple #7
0
class TestStreamCmp(TestCase):
    f1 = pkg_file('genomvar.test','data/example1.vcf.gz')
    f2 = pkg_file('genomvar.test','data/example2.vcf.gz')
    def test_cmp_vcf_files(self):
        def _get_info(info):
            if info=='.':
                return {}
            tokenized = info.split(';')
            kval = map(lambda i: i.split('=',maxsplit=1),tokenized)
            return {k:v for (k,v) in kval}
        out = io.StringIO()
        with warnings.catch_warnings(record=True):
            cnt = _cmp_vcf(self.f1,self.f2,out=out)
        self.assertEqual(cnt[0], 14)
        self.assertEqual(cnt[2], 4)
        self.assertEqual(cnt[1], 12)
        
        out.seek(0)
        noheader = itertools.dropwhile(lambda l: l.startswith('#'),out)
        rows = [VCFRow(*l.strip().split('\t')) for l in noheader]

        row0 = rows[0]
        info = _get_info(row0.INFO)
        self.assertEqual([row0.CHROM,row0.POS,row0.REF,row0.ALT],
                         ['chr23',7462,'G','T'])
        self.assertEqual(info['whichVCF'],'second')
        self.assertEqual(info['ln'],'13')

        #last = rows[-1]
        info = _get_info(rows[-1].INFO)
        self.assertEqual(info['ln'],'30')
        self.assertEqual(info['ln2'],'21')
        
    def test_unsorted_VCF_input(self):
        header = []
        lines = []
        with open(pkg_file('genomvar.test','data/example1.vcf'),'rt') as fh:
            for line in fh:
                if line.startswith('#'):
                    header.append(line)
                else:
                    lines.append(line)
        tf = tempfile.NamedTemporaryFile(suffix='.vcf')
        with open(tf.name,'wt') as fh:
            fh.writelines(header)
            fh.writelines(reversed(lines))
        out = io.StringIO()
        with warnings.catch_warnings(record=True):
            with self.assertRaises(UnsortedVariantFileError):
                _cmp_vcf(pkg_file('genomvar.test','data/example1.vcf'),
                         tf.name,out=out)
Exemple #8
0
 def test_GO_annotate_genelist14(self):
     input_lines = open(pkg_file(__name__, 'data/genelist14.tsv'),
                        'r').read()
     custom_annotation = open(
         pkg_file(__name__, 'data/custom_annotation2.txt'), 'r').read()
     req = dict(
         job_req, **{
             'paste_data': input_lines,
             'analysis_type': 'annot',
             'slim': 'custom',
             'custom_terms': custom_annotation,
             'organism': 'mouse'
         })
     resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
Exemple #9
0
    def test_GO_annotate_genelist2(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                           'r').read()
        custom_annotation = open(
            pkg_file(__name__, 'data/custom_annotation.txt'), 'r').read()
        req = dict(
            job_req, **{
                'paste_data': input_lines,
                'analysis_type': 'annot',
                'slim': 'custom',
                'custom_terms': custom_annotation
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        self.assertEqual(resp.status_code, 200)

        sn = GOnetSubmission.objects.latest('submit_time')
        net_dict = json.loads(sn.network)
        G = cyjs.cyjs2nx(net_dict)
        self.assertListEqual(list(G.predecessors('P29376')), ['GO:0071300'])
        self.assertListEqual(list(G.predecessors('Q5TBA9')), ['GO:0016043'])
        self.assertListEqual(list(G.predecessors('P16403')), ['GO:0065003'])

        # Test node GO:0071300 (cellular response to retinoic acid)
        n = list(
            filter(lambda n: n['data']['id'] == 'GO:0071300',
                   net_dict['elements']['nodes']))[0]
        self.assertEqual(n['data']['tot_gn'],
                         len(O.get_attr('GO:0071300', 'human')))

        # Test CSV response
        csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), )))
        b = io.StringIO()
        b.write(csv_resp.content.decode())
        b.seek(0)
        res = pd.read_csv(b, sep=',', index_col=1)
        self.assertIn('LTK', res.loc['GO:0032526', 'Genes'])

        # Test TXT response
        txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), )))
        b = io.StringIO()
        b.write(txt_resp.content.decode())
        b.seek(0)
        line_found = False
        for line in b:
            if line.strip().startswith('GO:0032526'):
                self.assertIn('LTK', line)
                line_found = True
                break
        self.assertTrue(line_found)
 def test_cmp_vrt_iter_vrt(self):
     vs1 = VariantSetFromFile(pkg_file('genomvar.test',
                                       'data/example1.vcf.gz'),
                              parse_samples=True)
     vs2 = VariantSetFromFile(pkg_file('genomvar.test',
                                       'data/example2.vcf.gz'),
                              parse_samples=True)
     comm = list()
     for vrt in vs1.comm_vrt(vs2).iter_vrt():
         comm.append(vrt)
         self.assertTrue(vrt.attrib['samples'],
                         msg='Vrt {} has no samples'.format(vrt))
     self.assertEqual(len(comm), 4)
     diff = vs1.diff_vrt(vs2).iter_vrt()
     self.assertEqual(len(list(diff)), 12)
 def test_cmp_vrt_region_multisample2(self):
     vs1 = VariantSetFromFile(pkg_file('genomvar.test',
                                       'data/example_1000genomes_1.vcf.gz'),
                              parse_samples=True,
                              index=True)
     vs2 = VariantSetFromFile(pkg_file('genomvar.test',
                                       'data/example_1000genomes_2.vcf.gz'),
                              parse_samples=True,
                              index=True)
     comm = []
     for vrt in vs2.comm_vrt(vs1).region(rgn='7:152134922-152436005'):
         comm.append(vrt)
         self.assertTrue(hasattr(vrt, 'attrib'), msg='False for' + str(vrt))
     comm = list(vs2.comm_vrt(vs1).region(rgn='7:152134922-152436005'))
     self.assertGreater(len(comm), 0)
 def test_cmp_vrt_region(self):
     vs1 = VariantSetFromFile(pkg_file('genomvar.test',
                                       'data/example1.vcf.gz'),
                              parse_samples=True,
                              parse_info=True,
                              index=True)
     vs2 = VariantSetFromFile(pkg_file('genomvar.test',
                                       'data/example2.vcf.gz'),
                              parse_samples='SAMP1',
                              parse_info=True,
                              index=True)
     comm = list(vs1.comm_vrt(vs2).region(rgn='chr24:10040-10050'))
     self.assertEqual(len(comm), 2)
     v1, v2 = comm
     self.assertEqual(v1.attrib['info']['AF'], 1.0)
     self.assertEqual(v1.attrib['samples']['SAMP1']['GT'], (0, 1))
Exemple #13
0
    def test_asterisk_variant(self):
        vset = VariantSet.from_vcf(pkg_file(
            'genomvar.test', 'data/example_with_asterisk.vcf.gz'),
                                   parse_info=True)

        vrt = list(vset.find_vrt('chr1', 995507, 995515))
        self.assertEqual(len(vrt), 3)
Exemple #14
0
 def test_init(self):
     reader = VCFReader(pkg_file('genomvar.test', 'data/example1.vcf'))
     self.assertEqual(reader.header_len, 15)
     dtype = reader._dtype
     self.assertEqual(len(dtype['format']), 1)
     self.assertTrue(issubclass(dtype['format']['GT']['dtype'], np.object_),
                     msg='Got type' + str(dtype['format']['GT']['type']))
Exemple #15
0
    def test_minimal_VCF_definition_io(self):
        buf = io.StringIO()
        with open(pkg_file('genomvar.test', 'data/example1.vcf'), 'rt') as fh:
            for line in fh:
                if line.startswith('##fileformat') \
                          or line.startswith('#CHROM') \
                          or not line.startswith('#'):
                    buf.write(line)

        buf.seek(0)
        reader = VCFReader(buf)

        outbuf = io.StringIO()
        writer = VCFWriter(format_spec=[RESERVED_FORMAT.GT],
                           samples=reader.samples)
        variants1 = []
        for vrt in reader.iter_vrt(parse_samples=True):
            self.assertTrue(
                isinstance(vrt.attrib['samples']['SAMP1']['GT'], str))
            if vrt.attrib['samples']['SAMP1'].get('GT') == '0/1':
                vrt.attrib['samples']['SAMP1']['GT'] = (0, 1)
            else:
                vrt.attrib['samples']['SAMP1']['GT'] = None
            outbuf.write(str(writer.get_row(vrt)))
            variants1.append(vrt)
        variants1.sort(key=lambda v: v.start)

        outbuf.seek(0)
        variants2 = list(VCFReader(outbuf).iter_vrt())
        variants2.sort(key=lambda v: v.start)

        for v1, v2 in zip(variants1, variants2):
            self.assertTrue(v1.edit_equal(v2))
Exemple #16
0
    def test_GO_annotate_genelist2_vs_enriched(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                           'r').read()
        req = dict(job_req, **{'paste_data': input_lines})
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)

        enrich_job = GOnetSubmission.objects.latest('submit_time')
        df = enrich_job.enrich_res_df
        enriched_terms = df[df['q'] < enrich_job.qvalue]['term']
        custom_annotation = '\n'.join(enriched_terms)
        req = dict(
            job_req, **{
                'paste_data': input_lines,
                'analysis_type': 'annot',
                'slim': 'custom',
                'custom_terms': custom_annotation
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        annot_job = GOnetSubmission.objects.latest('submit_time')

        G_enrich = cyjs.cyjs2nx(json.loads(enrich_job.network))
        G_annot = cyjs.cyjs2nx(json.loads(annot_job.network))

        self.assertSetEqual(set(G_enrich.nodes), set(G_annot.nodes))
        self.assertSetEqual(set(G_enrich.edges), set(G_annot.edges))
Exemple #17
0
    def test_submission_default(self):
        input_lines = open(pkg_file(__name__, 'data/tests/genelist6.tsv'),
                           'r').read()
        request_data = {
            'submit': ['Submit'],
            'paste_data': [input_lines],
            'namespace': ['biological_process'],
            'analysis_type': ['enrich'],
            'output_type': ['graph'],
            'csv_separator': ['\t'],
            'qvalue': [0.05]
        }
        threads = []
        for n in range(200):
            t = Thread(target=c.post,
                       args=(urls.reverse('GOnet-submit-form'), request_data))
            t.daemon = True
            t.start()
            threads.append(t)
            sleep(random.random() * 10)
            print('>!< Spamming task', len(threads))
        for t in threads:
            t.join()

        del threads
        print('sleeping......................................')
        sleep(100000000)
 def test_cmp_vrt_iter_same(self):
     vs = VariantSetFromFile(
         pkg_file('genomvar.test', 'data/example2.vcf.gz'))
     tot = list(vs.find_vrt())
     # print(tot)
     comm = list(vs.comm_vrt(vs).iter_vrt())
     self.assertEqual(len(comm), len(tot))
Exemple #19
0
    def test_class(self):
        vset = VariantSetFromFile(pkg_file('genomvar.test',
                                           'data/example1.vcf.gz'),
                                  parse_info=True,
                                  reference=self.chr24,
                                  parse_samples='SAMP1')

        # Test find_vrt and returned INFO
        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)
        v1, v2 = vrt
        self.assertEqual(v1.attrib['info']['NSV'], 1)
        self.assertEqual(v2.attrib['info']['RECN'], 19)

        # Test multiallelic
        vrt = list(vset.find_vrt('chr24', 20, 30))
        self.assertEqual(len(vrt), 2)
        v1, v2 = vrt
        self.assertEqual(v1.attrib['info']['AF'], 0.5)
        self.assertEqual(v2.attrib['info']['AF'], 0.5)

        # Test find_vrt precision
        vrt = list(vset.find_vrt('chr24', 2095, 2096))
        self.assertEqual(len(vrt), 1)
        vrt = list(vset.find_vrt('chr24', 2098, 2100))
        self.assertEqual(len(vrt), 1)

        # Test find all variants
        self.assertEqual(len(list(vset.find_vrt())), 16)

        # Test finding all variants
        self.assertEqual(len(list(vset.find_vrt())), 16)
Exemple #20
0
    def test_from_variants_vcf(self):
        vs0 = varset.VariantSet.from_vcf(pkg_file('genomvar.test',
                                                  'data/example1.vcf'),
                                         parse_info=True)
        variants1 = sorted(vs0.iter_vrt(), key=lambda v: v.key)
        vs = VariantSet.from_variants(variants1)
        _desc = 'Test for multinumber field'
        info_spec_tuples = [('DP4', 4, 'Integer', _desc),
                            ('NSV', 1, 'Integer')]
        info_spec_dict = vs0.dtype['info']
        for info_spec in (info_spec_tuples, info_spec_dict):
            tf = tempfile.NamedTemporaryFile(suffix='.vcf')
            with open(tf.name, 'wt') as fh:
                vs.to_vcf(fh, info_spec=info_spec)

            with open(tf.name, 'rt') as fh:
                self.assertIn(
                    '##INFO=<ID=DP4,Number=4,Type=Integer,Description="{}">'\
                          .format(_desc),
                    fh.read().splitlines())
                fh.seek(0)
                # print(fh.read())
            variants2 = sorted(VCFReader(tf.name).iter_vrt(parse_info=True),
                               key=lambda v: v.key)
            self.assertEqual(len(variants1), len(variants2))
            cnt = 0
            for v1, v2 in zip(variants1, variants2):
                self.assertTrue(v1.edit_equal(v2))
                self.assertEqual(v1.attrib['info']['NSV'],
                                 v2.attrib['info']['NSV'])
Exemple #21
0
 def test_GO_enrich_genelist6_long(self):
     input_lines = open(pkg_file(__name__, 'data/genelist6.tsv'),
                        'r').read()
     bg_file = open(
         pkg_file(__name__, 'data/DPOS_Mgate_Tcells_background_TPM1.lst'),
         'r')
     req = dict(
         job_req, **{
             'paste_data': input_lines,
             'bg_type': 'custom',
             'bg_file': bg_file,
             'qvalue': 0.0001
         })
     resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
     sn = GOnetSubmission.objects.latest('submit_time')
     self.assertEqual(resp.status_code, 200)
Exemple #22
0
 def test_resolution_mouse_Uniprot_IDs(self):
     genelist9 = pd.read_csv(pkg_file(__name__, 'data/genelist9.tsv'),
                             sep='\t')
     genelist9 = genelist9[genelist9.Uniprot_ID != 'None']
     input_lines = '\n'.join(genelist9['Uniprot_ID'])
     req = dict(
         job_req, **{
             'paste_data': input_lines,
             'analysis_type': 'annot',
             'slim': 'goslim_immunol',
             'output_type': 'csv',
             'organism': 'mouse'
         })
     URL = urls.reverse('GOnet-submit-form')
     resp = c.post(URL, req, follow=True)
     self.assertEqual(resp.status_code, 200)
     sn = GOnetSubmission.objects.latest('submit_time')
     idmap_resp = c.get(
         urls.reverse('GOnet-input-idmap', args=(str(sn.id), )))
     b = io.StringIO()
     b.write(idmap_resp.content.decode())
     b.seek(0)
     res = pd.read_csv(b, sep='\t', index_col=0)
     for tup in genelist9.itertuples():
         if tup.MGI_ID == 'MGI:2151253':
             continue
         self.assertEqual(res.loc[tup.Uniprot_ID, 'MGI_ID'], tup.MGI_ID)
Exemple #23
0
    def test_from_variants_to_vcf_with_sampdata(self):
        file = pkg_file('genomvar.test', 'data/example3.vcf')
        variants1 = sorted(VCFReader(file).iter_vrt(parse_samples=True),
                           key=lambda v: v.key)
        vs = VariantSet.from_variants(variants1)
        tf = tempfile.NamedTemporaryFile(suffix='.vcf')

        with open(tf.name, 'wt') as fh:
            vs.to_vcf(
                fh,
                format_spec=[RESERVED_FORMAT.GT, ('AD', 'R', 'Integer', '')],
                samples=['SAMP1'])

        with open(tf.name, 'rt') as fh:
            fh.seek(0)
            self.assertIn(
                '##FORMAT=<ID=AD,Number=R,Type=Integer,'\
                +'Description="">',
                fh.read().splitlines())
        variants2 = sorted(VCFReader(tf.name).iter_vrt(parse_samples=True),
                           key=lambda v: v.key)
        self.assertEqual(len(variants1), len(variants2))
        cnt = 0
        for v1, v2 in zip(variants1, variants2):
            self.assertTrue(v1.edit_equal(v2))
            self.assertEqual(v1.attrib['samples']['SAMP1']['AD'],
                             v2.attrib['samples']['SAMP1']['AD'])
Exemple #24
0
    def test_from_vcf_with_attr(self):
        s = VariantSet.from_vcf(pkg_file('genomvar.test', 'data/example1.vcf'),
                                parse_info=True)
        _vrt = list(s.find_vrt('chr24', 150, 160))
        self.assertEqual(len(_vrt), 1)
        vrt = _vrt[0]
        self.assertEqual(vrt.attrib['info']['AF'], 1.0)

        # Check multiallelic locus
        _vrt = list(s.find_vrt('chr24', 20, 30))
        self.assertEqual(len(_vrt), 2)
        for vrt in _vrt:
            if not vrt.is_variant_instance(variant.Null):
                self.assertEqual(vrt.attrib['info']['AF'], 0.5)

        # Check None/KeyError cases (".",field absent...)
        _vrt = list(
            filter(lambda o: not o.is_variant_instance(variant.Null),
                   s.find_vrt('chr24', 450, 460)))
        self.assertEqual(len(_vrt), 1)
        vrt = _vrt[0]
        with self.assertRaises(ValueError):
            vrt.attrib['info']['Randomfields']

        _vrt = list(
            filter(lambda o: not o.is_variant_instance(variant.Null),
                   s.find_vrt('chr24', 4750, 4760)))
        self.assertEqual(len(_vrt), 1)
        vrt = _vrt[0]
        self.assertEqual(vrt.attrib['info']['STR'], True)
Exemple #25
0
    def test_from_vcf_to_records(self):
        vs = VariantSet.from_vcf(pkg_file('genomvar.test',
                                          'data/example1.vcf'),
                                 parse_info=True,
                                 parse_samples=True)

        self.assertEqual(vs._samples, ['SAMP1'])

        # Test nested dtype
        recs = vs.to_records(nested=True)
        self.assertEqual(list(recs.dtype.fields), [
            'chrom', 'start', 'end', 'ref', 'alt', 'vartype', 'phase_group',
            'info', 'SAMPLES'
        ])
        self.assertEqual(
            list(recs['info'].dtype.fields),
            ['NSV', 'AF', 'DP4', 'ECNT', 'pl', 'mt', 'RECN', 'STR'])
        self.assertEqual(list(recs['SAMPLES'].dtype.fields), ['SAMP1'])
        self.assertEqual(list(recs['SAMPLES']['SAMP1'].dtype.fields), ['GT'])

        # Test not nested
        recs = vs.to_records(nested=False)
        self.assertEqual(list(recs.dtype.fields), [
            'chrom', 'start', 'end', 'ref', 'alt', 'vartype', 'phase_group',
            'info_NSV', 'info_AF', 'info_DP4', 'info_ECNT', 'info_pl',
            'info_mt', 'info_RECN', 'info_STR', 'SAMPLES_SAMP1_GT'
        ])
Exemple #26
0
 def test_GO_annotate_invalid_term(self):
     input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                        'r').read()
     custom_annotation = open(
         pkg_file(__name__, 'data/custom_annotation.txt'), 'r').read()
     custom_annotation += 'GO:1234567'
     req = dict(
         job_req, **{
             'paste_data': input_lines,
             'analysis_type': 'annot',
             'slim': 'custom',
             'custom_terms': custom_annotation
         })
     resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
     self.assertContains(
         resp, 'Some of the custom terms provided were not found')
     self.assertContains(resp, 'GO:1234567')
Exemple #27
0
 def test_sv_types(self):
     with warnings.catch_warnings(record=True) as wrn:
         vs = VariantSet.from_vcf(
             pkg_file('genomvar.test', 'data/example4.vcf.gz'))
         warnings.simplefilter('always')
         self.assertEqual(vs.nof_unit_vrt(), 100)
         self.assertGreater(len(wrn), 1)
         self.assertIn('Structural', str(wrn[-1].message))
Exemple #28
0
    def test_sort_chroms(self):
        vs = VariantSet.from_vcf(
            pkg_file('genomvar.test', 'data/example2.vcf.gz'))
        vs.sort_chroms()
        self.assertEqual(list(vs.get_chroms()), ['chr23', 'chr24'])

        vs.sort_chroms(key=lambda c: 1 if c == 'chr24' else 2)
        self.assertEqual(list(vs.get_chroms()), ['chr24', 'chr23'])
Exemple #29
0
    def test_GO_annotate_genelist2(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                           'r').read()
        input_data_df = pd.read_csv(pkg_file(__name__, 'data/genelist2.tsv'),
                                    sep='\t',
                                    header=None)
        req = dict(
            job_req, **{
                'paste_data': input_lines,
                'analysis_type': 'annot',
                'slim': 'goslim_immunol'
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        self.assertEqual(resp.status_code, 200)
        sn = GOnetSubmission.objects.latest('submit_time')
        net = json.loads(sn.network)
        G = cyjs.cyjs2nx(net)
        self.assertTrue(G.has_edge('GO:0007165', 'P29376'))

        # Test recognition of user-supplied contrast values
        gene_nodes = filter(lambda n: not n['data']['name'].startswith('GO:'),
                            net['elements']['nodes'])
        gene_nodes = list(gene_nodes)
        self.assertEqual(len(list(filter(lambda node: float(node['data']['expr:user_supplied'])>0,   gene_nodes))), \
                         np.sum(input_data_df[1]>0) - 1 ) #-1 for HIST1H2AM
        self.assertEqual(len(list(filter(lambda node: float(node['data']['expr:user_supplied'])<0, gene_nodes))), \
                         np.sum(input_data_df[1]<0))

        #Test CSV response
        csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), )))
        res = io.StringIO()
        res.write(csv_resp.content.decode())
        res.seek(0)
        res_df = pd.read_csv(res, sep=',', index_col=0)
        self.assertIn('GO:0007165', set(res_df['GO_term_ID']))
        self.assertEqual(res_df.index[0], 1)

        #Test TXT response
        txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), )))
        res = io.StringIO()
        res.write(txt_resp.content.decode())
        res.seek(0)
        goterms = set()
        for line in res:
            goterms.add(line.split()[0])
        self.assertIn('GO:0007165', goterms)
Exemple #30
0
 def test_empty_vcf(self):
     buf = io.StringIO()
     with open(pkg_file('genomvar.test', 'data/example1.vcf')) as fh:
         for line in itertools.takewhile(lambda l: l.startswith('#'), fh):
             buf.write(line)
     buf.seek(0)
     vs = VariantSet.from_vcf(buf)
     self.assertEqual(vs.nof_unit_vrt(), 0)