Beispiel #1
0
    def test_json_parser(self):
        self.override_annotation('T1', None, ['Real', 'yes', 1],
                                 ['Bogus', 'no', 0])

        from pprint import pprint
        parser = parsers.ClassificationParser('json')
        pprint(self.test_json)
        d = parser.process(self.test_json)

        compare = {
            'classification_id': 60910323,
            'user_id': 1437100,
            'workflow': 1737,
            'time_stamp': datetime.datetime(2017, 6, 22, 11, 29, 50, 609000),
            'session_id':
            '53244efffa0eddf43255b5f37b6b462242f39141f3373215d45054da4b1d9066',
            'live_project': True,
            'seen_before': False,
            'annotation': 1,
            'subject_id': 1053214149494,
        }

        print(d)
        for key, value in compare.items():
            print(key, value)
            assert d[key] == value

        assert len(d) == len(compare)
Beispiel #2
0
    def upload_project_dump(self, fname):
        logger.info('dropping collection')
        self._rebuild()

        logger.info('parsing csv dump')
        data = []
        pp = parsers.ClassificationParser('csv')

        with open(fname, 'r') as file:
            reader = csv.DictReader(file)

            for i, row in enumerate(reader):
                cl = pp.process(row)
                if cl is None:
                    continue
                data.append(cl)

                sys.stdout.flush()
                sys.stdout.write("%d records processed\r" % i)

                if len(data) > 100000:
                    self.collection.insert_many(data)
                    data = []

        logger.critical('Parsers skipped %d rows', pp.skipped)
        self.collection.insert_many(data)
        self._gen_stats()
        logger.debug('done')
Beispiel #3
0
    def test_csv_parser_muon_complex(self):
        self.override_annotation('T0', '0.details.0.value.0', [3], [-1])

        from pprint import pprint
        parser = parsers.ClassificationParser('csv')
        pprint(self.test_csv[1])
        d = parser.process(self.test_csv[1].copy())

        compare = {
            'classification_id': 15935073,
            'user_id': 1460166,
            'workflow': 2473,
            'time_stamp': datetime.datetime(2016, 8, 22, 16, 10, 26),
            'session_id':
            '6049552d100cae2a9570c40bee1119cfbca4decae1e50bafffc3cba873793d6f',
            'live_project': False,
            'seen_before': False,
            'annotation': 1,
            'subject_id': 3354054,
        }

        print(d)
        for key, value in compare.items():
            print(key, value)
            assert d[key] == value

        assert len(d) == len(compare)
Beispiel #4
0
    def test_csv_parser_elephant(self):
        self.override_annotation('T1', None, ['2'], ['-1'])

        from pprint import pprint
        parser = parsers.ClassificationParser('csv')
        pprint(self.test_csv[0])
        d = parser.process(self.test_csv[0].copy())

        compare = {
            'classification_id': 25656085,
            'user_id': 1559523,
            'workflow': 3304,
            'time_stamp': datetime.datetime(2017, 1, 24, 17, 0, 38),
            'session_id':
            '3eb59fd166f9ace809fd1b611a52b14152e1a86f998625f7cf88f14627fa318d',
            'live_project': False,
            'seen_before': False,
            'annotation': 1,
            'subject_id': 458040,
        }

        print(d)
        for key, value in compare.items():
            print(key, value)
            assert d[key] == value

        assert len(d) == len(compare)
Beispiel #5
0
    def test_csv_parser_seen_before(self):
        self.override_annotation('T1', None, ['Real', 'yes', 1],
                                 ['Bogus', 'no', 0])

        from pprint import pprint
        parser = parsers.ClassificationParser('csv')
        cl = self.test_csv[3].copy()
        pprint(cl)
        d = parser.process(cl)

        compare = {
            'classification_id': 11423065,
            'user_id': 1437100,
            'workflow': 1737,
            'time_stamp': datetime.datetime(2016, 4, 18, 18, 50, 48),
            'session_id':
            'b759eab8f4fe3436707edede3094cd5bd30c4812e2a701e48fa7cb13f0068f40',
            'live_project': False,
            'seen_before': True,
            'annotation': 1,
            'subject_id': 1935795,
        }

        print(d)
        for key, value in compare.items():
            print(key, value)
            assert d[key] == value

        assert len(d) == len(compare)
Beispiel #6
0
    def test_type_mod_timestamp(self):
        parser = parsers.ClassificationParser(None)

        t = parser._type('2017-01-24T16:11:24.680Z', 'timestamp')
        assert t == datetime.datetime(2017, 1, 24, 16, 11, 24, 680000)

        t = parser._type('2017-01-24 16:11:24 UTC', 'timestamp')
        assert t == datetime.datetime(2017, 1, 24, 16, 11, 24)
Beispiel #7
0
    def test_remap(self):
        field = {'type': int, 'remap': ['b', 'c', 'd', 'e']}
        parser = parsers.ClassificationParser(None)

        assert parser._remap({'a': 1}, 'a', field) == 1
        assert parser._remap({'b': 2}, 'a', field) == 2
        assert parser._remap({'c': 3}, 'a', field) == 3
        assert parser._remap({'d': 4}, 'a', field) == 4
        assert parser._remap({'e': 5}, 'a', field) == 5
Beispiel #8
0
    def test_remap_notsource(self):
        parser = parsers.ClassificationParser(None)
        field = {'type': int, 'remap': {'json': 'b'}}
        cl = {'a': 1, 'b': 2}

        assert parser._remap(cl, 'a', field) == 1
Beispiel #9
0
    def test_type_mod_float(self):
        parser = parsers.ClassificationParser(None)

        assert parser._type('0.1', float) == 0.1
Beispiel #10
0
    def test_type_mod_bool(self):
        parser = parsers.ClassificationParser(None)

        assert parser._type('true', bool) is True
        assert parser._type(True, bool) is True
        assert parser._type('True', bool) is True
Beispiel #11
0
    def test_type_mod_int(self):
        parser = parsers.ClassificationParser(None)

        assert parser._type('1', int) == 1
        assert parser._type(1, int) == 1