Beispiel #1
0
    def test_1news_minfinUA(self, requests_fun):
        self.assertEqual(parse_minfin_headlines(), self.news_minfin_data,
                         'wrong data after parser')

        result = parent([
            (news_minfin.parse_minfin_headlines, ),
        ], 'news')
        self.assertEqual(result, (len(self.news_minfin_data), 0),
                         'parse_minfin_headlines wrong return')
        self.assertEqual(
            news.find({}).count(), len(self.news_minfin_data),
            'wrong news number in DB')
        test_doc = news.find_one(
            {'href': 'http://minfin.com.ua/2017/03/17/26771826/'})
        self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53),
                         'UTC time for doc error')

        # duplicate insert
        result = parent([
            (news_minfin.parse_minfin_headlines, ),
        ], 'news')
        self.assertEqual(
            result, (0, len(self.news_minfin_data)),
            'parse_minfin_headlines wrong return; '
            'error in duplicate insert')
        self.assertEqual(
            news.find({}).count(), len(self.news_minfin_data),
            'wrong news number in DB')
        test_doc = news.find_one(
            {'href': 'http://minfin.com.ua/2017/03/17/26771826/'})
        self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53),
                         'UTC time for doc error')
Beispiel #2
0
    def test_2minfin_headlines(self, requests_fun):

        url = 'http://www.minfin.gov.ua/news/novini-ta-media'
        self.assertEqual(self.minfin_headlines_novini_ta_media_data,
                         minfin_headlines_url(url),
                         'error in parse_minfin_headlines')
        url = 'http://www.minfin.gov.ua/news/borg/zovnishni-suverenni-zobovjazannja'
        self.assertEqual(self.minfin_headlines_ZSZ_data,
                         minfin_headlines_url(url),
                         'error in parse_minfin_headlines')
        self.assertEqual(self.minfin_headlines_all, minfin_headlines(),
                         'error in parse_minfin_headlines')

        # mongo tests
        result = parent([(minfin.minfin_headlines, ), ], 'news')
        self.assertEqual(len(self.minfin_headlines_all),
                         len(self.minfin_headlines_novini_ta_media_data) + len(self.minfin_headlines_ZSZ_data),
                         'simple check; sum of first two list should be eq. to third')
        self.assertEqual(result, (len(self.minfin_headlines_all), 0), 'minfin_headlines_all return')
        self.assertEqual(news.find({}).count(), len(self.minfin_headlines_all), 'wrong news number in DB')
        test_doc = news.find_one({'href': 'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja'})
        self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0), 'UTC time for doc error')

        # mongo duplicate test
        result = parent([(minfin.minfin_headlines,), ], 'news')
        self.assertEqual(result, (0, len(self.minfin_headlines_all)), 'minfin_headlines_all wrong return')
        self.assertEqual(news.find({}).count(), len(self.minfin_headlines_all), 'wrong news number in DB')
        test_doc = news.find_one({'href': 'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja'})
        self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0), 'UTC time for doc error')
Beispiel #3
0
    def test_mongo_data(self):
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        print(result)
        print('self.data_all= ', len(self.data_all))
        first_insert_time = datetime.now(tz=local_tz)

        self.assertEqual(result, (len(self.data_all) - 6 + 1, 0),
                         '6 - docs with cookies from minfin in "record";'
                         'in "data_active" present only 1 doc with cookies')
        # TODO: could be poblem. In records inserted not all cookies, in data_active inserted all cookies
        self.assertEqual(result[0], records.find({}).count(), 'should present 1 doc with cookies')
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        self.assertEqual(data_active.find({}).count(), len(self.data_all), 'should be {} in "data_active"'.format(
            len(self.data_all)))
        self.assertEqual(data_active.find({}).count(), records.find({}).count() + 6 - 1)

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid': bid})['time']
            doc = data_active.find_one({'bid': bid})
            doc = date_depends_time(doc)    # fix date if time is grade then current TODO: need check, if change need in code
            self.assertLess(doc['time'], doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one({'bid': bid})['time_update']
        sleep(1)

# second insert of same data
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        self.assertEqual(result, (0, 0))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records')
        print(result)
        self.assertEqual(result, (0, 8))
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
Beispiel #4
0
    def test_2minfin_headlines(self, requests_fun):

        url = 'http://www.minfin.gov.ua/news/novini-ta-media'
        self.assertEqual(self.minfin_headlines_novini_ta_media_data,
                         minfin_headlines_url(url),
                         'error in parse_minfin_headlines')
        url = 'http://www.minfin.gov.ua/news/borg/zovnishni-suverenni-zobovjazannja'
        self.assertEqual(self.minfin_headlines_ZSZ_data,
                         minfin_headlines_url(url),
                         'error in parse_minfin_headlines')
        self.assertEqual(self.minfin_headlines_all, minfin_headlines(),
                         'error in parse_minfin_headlines')

        # mongo tests
        result = parent([
            (minfin.minfin_headlines, ),
        ], 'news')
        self.assertEqual(
            len(self.minfin_headlines_all),
            len(self.minfin_headlines_novini_ta_media_data) +
            len(self.minfin_headlines_ZSZ_data),
            'simple check; sum of first two list should be eq. to third')
        self.assertEqual(result, (len(self.minfin_headlines_all), 0),
                         'minfin_headlines_all return')
        self.assertEqual(
            news.find({}).count(), len(self.minfin_headlines_all),
            'wrong news number in DB')
        test_doc = news.find_one({
            'href':
            'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja'
        })
        self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0),
                         'UTC time for doc error')

        # mongo duplicate test
        result = parent([
            (minfin.minfin_headlines, ),
        ], 'news')
        self.assertEqual(result, (0, len(self.minfin_headlines_all)),
                         'minfin_headlines_all wrong return')
        self.assertEqual(
            news.find({}).count(), len(self.minfin_headlines_all),
            'wrong news number in DB')
        test_doc = news.find_one({
            'href':
            'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja'
        })
        self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0),
                         'UTC time for doc error')
Beispiel #5
0
    def test_1news_minfinUA(self, requests_fun):
        self.assertEqual(parse_minfin_headlines(), self.news_minfin_data, 'wrong data after parser')

        result = parent([(news_minfin.parse_minfin_headlines, ), ], 'news')
        self.assertEqual(result, (len(self.news_minfin_data), 0), 'parse_minfin_headlines wrong return')
        self.assertEqual(news.find({}).count(), len(self.news_minfin_data), 'wrong news number in DB')
        test_doc = news.find_one({'href': 'http://minfin.com.ua/2017/03/17/26771826/'})
        self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53), 'UTC time for doc error')

        # duplicate insert
        result = parent([(news_minfin.parse_minfin_headlines,), ], 'news')
        self.assertEqual(result, (0, len(self.news_minfin_data)), 'parse_minfin_headlines wrong return; '
                                                                  'error in duplicate insert')
        self.assertEqual(news.find({}).count(), len(self.news_minfin_data), 'wrong news number in DB')
        test_doc = news.find_one({'href': 'http://minfin.com.ua/2017/03/17/26771826/'})
        self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53), 'UTC time for doc error')
    def test_insert(self):
        result = parent([(data_api_minfin, self.get_triple_data_test),], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        assert result == (308, 0), 'wrong parent return, etalon is 308 records'
        assert result[0] == records.find({}).count(), 'function insert report wrong'
        assert records.find({}).count() == 308, 'should be 308 in "records"'
        assert data_active.find({}).count() == 308, 'should be 308 in "data_active"'
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \
                                                                         'should be equal'
        assert records.find({'session': True}).count() == 6, 'should be 6 cookies in "records"'
        assert data_active.find({'session': True}).count() == 6, 'should be 6 cookies in "data_active"'
        test_record_time = data_active.find_one({'bid': '32166583'})['time']
        assert data_active.find_one({'bid': '32166583'})['time'] < \
               data_active.find_one({'bid': '32166583'})['time_update']
        test_record_first_update_time = data_active.find_one({'bid': '32166583'})['time_update']

        # second insert of same data
        result = parent([(data_api_minfin, self.get_triple_data_test), ], 'records')
        assert result == (308, 0), 'wrong parent return, etalon is 308 records'
        assert data_active.find_one({'bid': '32166583'})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': '32166583'})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
        assert data_active.find({}).count() == 308, 'should be 308 in "data_active"'
        assert records.find({}).count() == 308 * 2, 'should be 616 in "records"'
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(data_api_minfin, self.get_triple_data_test_del), ], 'records')
        print(result)
        assert result == (302, 6), 'wrong parent return, etalon is 302 records'
        assert result[0] == data_active.find({}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == 302, 'should be 302 in "records"'
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        assert data_active.find({'session': True}).count() == 6, 'should be 6 cookies in "data_active"'
        assert data_active.find_one({'bid': '32166583'})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': '32166583'})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
Beispiel #7
0
    def test_3parse_announcement_ovdp(self, requests_fun):
        result = announcement_ovdp()
        result = [{**doc, **{'time': kyiv_tz.localize(datetime(2017, 3, 19, 19, 32, 34, 319882))}}
                  for doc in result]                    # replace today date on date from etalon
        announcement_ovdp_data = [{**doc, **{'headline': doc['headline'] + ' in a same_date', 'flag': 'same_date'}}
                                  if doc['time_auction'].date() == datetime.today().date() else doc
                                  for doc in self.announcement_ovdp_data]
        # print(announcement_ovdp_data, result)
        self.assertEqual(announcement_ovdp_data, result,
                         'error in parse_announcement_ovdp')


        # mongo tests
        current_time = datetime.utcnow()
        mongo_result = parent([(minfin.announcement_ovdp, ), ], 'news')
        self.assertEqual(mongo_result, (len(self.announcement_ovdp_data), 0), 'announcement_ovdp wrong return')
        self.assertEqual(news.find({}).count(), len(self.announcement_ovdp_data), 'wrong news number in DB')
        test_doc = news.find_one({'href_announce': 'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc'})
        if datetime.today().date() != datetime(2017, 3, 21).date():
            self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017', 'quick check NAME of doc')
        else:
            self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017 in a same_date', 'quick check NAME of doc')
        self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0), 'time_auction should be same')
        self.assertAlmostEqual(test_doc['time'], current_time, delta=timedelta(seconds=1),
                               msg='time should be same as insert data')


        # mongo check duplicate
        mongo_result = parent([(minfin.announcement_ovdp, ), ], 'news')
        self.assertEqual(mongo_result, (0, len(self.announcement_ovdp_data)), 'announcement_ovdp wrong return')
        self.assertEqual(news.find({}).count(), len(self.announcement_ovdp_data), 'wrong news number in DB')
        test_doc = news.find_one({'href_announce': 'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc'})
        if datetime.today().date() != datetime(2017, 3, 21).date():
            self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017', 'quick check NAME of doc')
        else:
            self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017 in a same_date', 'quick check NAME of doc')
        self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0), 'time_auction should be same')
        self.assertAlmostEqual(test_doc['time'], current_time, delta=timedelta(seconds=1),
                               msg='time should be same as INSERT data')
    def test_insert(self):
        result = parent([(data_api_finance_ua, self.fetch_data_test),], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        assert result == (len(self.data), 0), 'wrong parent return, etalon is {} records'.format(len(self.data))
        assert result[0] == records.find({}).count(), 'function insert report wrong'
        assert records.find({}).count() == len(self.data), 'should be {} in "records"'.format(len(self.data))
        assert data_active.find({}).count() == len(self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \
                                                                         'should be equal'
        test_bid = 960072
        test_record_time = data_active.find_one({'bid': test_bid})['time']
        bid_time = data_active.find_one({'bid': test_bid})['time']
        bid_time_update = data_active.find_one({'bid': test_bid})['time_update']
        # assert bid_time < bid_time_update, 'bid_time= {}, bid_time_update= {}'.format(bid_time, bid_time_update)
        test_record_first_update_time = data_active.find_one({'bid': test_bid})['time_update']

        # second insert of same data
        result = parent([(data_api_finance_ua, self.fetch_data_test),], 'records')
        assert result == (len(self.data), 0), 'wrong parent return, etalon is {} records'.format(len(self.data))
        assert data_active.find_one({'bid': test_bid})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
        assert data_active.find({}).count() == len(self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert records.find({}).count() == len(self.data) * 2, 'should be {} in "records"'.format(len(self.data)*2)
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(data_api_finance_ua, self.fetch_data_test_del), ], 'records')
        print(result)
        assert result == (len(self.data) - 1, 1), 'wrong parent return, etalon is {} records'.format(len(self.data)-1)
        assert result[0] == data_active.find({}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(self.data) - 1, 'should be {} in "records"'.format(len(self.data)-1)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        assert data_active.find_one({'bid': test_bid})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
Beispiel #9
0
    def test_mongo_data(self):
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        self.assertEqual(
            result, (len(self.data_all), 0),
            'wrong parent return, etalon is {} records'.format(
                len(self.data_all)))
        self.assertEqual(result[0], records.find({}).count(), 'function reported = {}, ' \
                                                      'docs in DB= {}'.format(result[0], records.find({}).count()))
        assert records.find({}).count() == len(
            self.data_all), 'should be {} in "records"'.format(
                len(self.data_all))
        assert data_active.find({}).count() == len(
            self.data_all), 'should be {} in "data_active"'.format(
                len(self.data_all))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and ' \
                                                                         '"data_active" should be equal'

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid':
                                                              bid})['time']
            doc = data_active.find_one({'bid': bid})
            assert doc['time'] < doc['time_update'], 'bid {bid} with time {time}, ' \
                                                     'but in DB update {time_update}'.format(bid=bid, time=doc['time'],
                                                                                             time_update=doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one(
                {'bid': bid})['time_update']
        sleep(1)

        # second insert of same data
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        assert result == (len(self.data_all), 0), 'wrong parent return ={}, ' \
                                                  'etalon is {} records'.format(result, len(self.data_all))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one(
                {'bid': bid})['time'] == test_record_times[
                    spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        assert records.find({}).count() == len(self.data_all) * 2, 'should be {} in ' \
                                                                   '"records"'.format(len(self.data_all) * 2)
        assert first_insert_time < data_active.find_one(
        )['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
             (parse_minfin.data_api_minfin, self.get_triple_data_test_del)],
            'records')
        print(result)
        assert result == (len(self.data_all) - 8, 8), 'wrong parent return, ' \
                                                      'etalon is {} records'.format(len(self.data_all) - 8)
        assert result[0] == data_active.find(
            {}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(self.data_all) - 8, 'should be {} ' \
                                                                       'in "records"'.format(len(self.data_all) - 8)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
Beispiel #10
0
    def test_mongo_data(self):
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        self.assertEqual(result, (len(self.data_all), 0),
                         'wrong parent return, etalon is {} records'.format(len(self.data_all)))
        self.assertEqual(result[0], records.find({}).count(), 'function reported = {}, ' \
                                                      'docs in DB= {}'.format(result[0], records.find({}).count()))
        assert records.find({}).count() == len(self.data_all), 'should be {} in "records"'.format(len(self.data_all))
        assert data_active.find({}).count() == len(self.data_all), 'should be {} in "data_active"'.format(
            len(self.data_all))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and ' \
                                                                         '"data_active" should be equal'

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid': bid})['time']
            doc = data_active.find_one({'bid': bid})
            assert doc['time'] < doc['time_update'], 'bid {bid} with time {time}, ' \
                                                     'but in DB update {time_update}'.format(bid=bid, time=doc['time'],
                                                                                             time_update=doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one({'bid': bid})['time_update']
        sleep(1)

# second insert of same data
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        assert result == (len(self.data_all), 0), 'wrong parent return ={}, ' \
                                                  'etalon is {} records'.format(result, len(self.data_all))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        assert records.find({}).count() == len(self.data_all) * 2, 'should be {} in ' \
                                                                   '"records"'.format(len(self.data_all) * 2)
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records')
        print(result)
        assert result == (len(self.data_all) - 8, 8), 'wrong parent return, ' \
                                                      'etalon is {} records'.format(len(self.data_all) - 8)
        assert result[0] == data_active.find({}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(self.data_all) - 8, 'should be {} ' \
                                                                       'in "records"'.format(len(self.data_all) - 8)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
Beispiel #11
0
    def test_3parse_announcement_ovdp(self, requests_fun):
        result = announcement_ovdp()
        result = [{
            **doc,
            **{
                'time': kyiv_tz.localize(
                    datetime(2017, 3, 19, 19, 32, 34, 319882))
            }
        } for doc in result]  # replace today date on date from etalon
        announcement_ovdp_data = [{
            **doc,
            **{
                'headline': doc['headline'] + ' in a same_date',
                'flag': 'same_date'
            }
        } if doc['time_auction'].date() == datetime.today().date() else doc
                                  for doc in self.announcement_ovdp_data]
        # print(announcement_ovdp_data, result)
        self.assertEqual(announcement_ovdp_data, result,
                         'error in parse_announcement_ovdp')

        # mongo tests
        current_time = datetime.utcnow()
        mongo_result = parent([
            (minfin.announcement_ovdp, ),
        ], 'news')
        self.assertEqual(mongo_result, (len(self.announcement_ovdp_data), 0),
                         'announcement_ovdp wrong return')
        self.assertEqual(
            news.find({}).count(), len(self.announcement_ovdp_data),
            'wrong news number in DB')
        test_doc = news.find_one({
            'href_announce':
            'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc'
        })
        if datetime.today().date() != datetime(2017, 3, 21).date():
            self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017',
                             'quick check NAME of doc')
        else:
            self.assertEqual(test_doc['headline'],
                             'OVDP 21.03.2017 in a same_date',
                             'quick check NAME of doc')
        self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0),
                         'time_auction should be same')
        self.assertAlmostEqual(test_doc['time'],
                               current_time,
                               delta=timedelta(seconds=1),
                               msg='time should be same as insert data')

        # mongo check duplicate
        mongo_result = parent([
            (minfin.announcement_ovdp, ),
        ], 'news')
        self.assertEqual(mongo_result, (0, len(self.announcement_ovdp_data)),
                         'announcement_ovdp wrong return')
        self.assertEqual(
            news.find({}).count(), len(self.announcement_ovdp_data),
            'wrong news number in DB')
        test_doc = news.find_one({
            'href_announce':
            'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc'
        })
        if datetime.today().date() != datetime(2017, 3, 21).date():
            self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017',
                             'quick check NAME of doc')
        else:
            self.assertEqual(test_doc['headline'],
                             'OVDP 21.03.2017 in a same_date',
                             'quick check NAME of doc')
        self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0),
                         'time_auction should be same')
        self.assertAlmostEqual(test_doc['time'],
                               current_time,
                               delta=timedelta(seconds=1),
                               msg='time should be same as INSERT data')
Beispiel #12
0
    def test_mongo_data(self):
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        print(result)
        print('self.data_all= ', len(self.data_all))
        first_insert_time = datetime.now(tz=local_tz)

        self.assertEqual(
            result, (len(self.data_all) - 6 + 1, 0),
            '6 - docs with cookies from minfin in "record";'
            'in "data_active" present only 1 doc with cookies')
        # TODO: could be poblem. In records inserted not all cookies, in data_active inserted all cookies
        self.assertEqual(result[0],
                         records.find({}).count(),
                         'should present 1 doc with cookies')
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        self.assertEqual(
            data_active.find({}).count(), len(self.data_all),
            'should be {} in "data_active"'.format(len(self.data_all)))
        self.assertEqual(
            data_active.find({}).count(),
            records.find({}).count() + 6 - 1)

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid':
                                                              bid})['time']
            doc = data_active.find_one({'bid': bid})
            doc = date_depends_time(
                doc
            )  # fix date if time is grade then current TODO: need check, if change need in code
            self.assertLess(doc['time'], doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one(
                {'bid': bid})['time_update']
        sleep(1)

        # second insert of same data
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        self.assertEqual(result, (0, 0))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one(
                {'bid': bid})['time'] == test_record_times[
                    spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        assert first_insert_time < data_active.find_one(
        )['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
             (parse_minfin.data_api_minfin, self.get_triple_data_test_del)],
            'records')
        print(result)
        self.assertEqual(result, (0, 8))
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
Beispiel #13
0
    def test_insert(self):
        result = parent([
            (data_api_finance_ua, self.fetch_data_test),
        ], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        assert result == (len(
            self.data), 0), 'wrong parent return, etalon is {} records'.format(
                len(self.data))
        assert result[0] == records.find(
            {}).count(), 'function insert report wrong'
        assert records.find({}).count() == len(
            self.data), 'should be {} in "records"'.format(len(self.data))
        assert data_active.find({}).count() == len(
            self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \
                                                                         'should be equal'
        test_bid = 960072
        test_record_time = data_active.find_one({'bid': test_bid})['time']
        bid_time = data_active.find_one({'bid': test_bid})['time']
        bid_time_update = data_active.find_one({'bid':
                                                test_bid})['time_update']
        # assert bid_time < bid_time_update, 'bid_time= {}, bid_time_update= {}'.format(bid_time, bid_time_update)
        test_record_first_update_time = data_active.find_one({'bid': test_bid
                                                              })['time_update']

        # second insert of same data
        result = parent([
            (data_api_finance_ua, self.fetch_data_test),
        ], 'records')
        assert result == (len(
            self.data), 0), 'wrong parent return, etalon is {} records'.format(
                len(self.data))
        assert data_active.find_one({
            'bid': test_bid
        })['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
        assert data_active.find({}).count() == len(
            self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert records.find({}).count() == len(
            self.data) * 2, 'should be {} in "records"'.format(
                len(self.data) * 2)
        assert first_insert_time < data_active.find_one(
        )['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([
            (data_api_finance_ua, self.fetch_data_test_del),
        ], 'records')
        print(result)
        assert result == (
            len(self.data) - 1,
            1), 'wrong parent return, etalon is {} records'.format(
                len(self.data) - 1)
        assert result[0] == data_active.find(
            {}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(
            self.data) - 1, 'should be {} in "records"'.format(
                len(self.data) - 1)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        assert data_active.find_one({
            'bid': test_bid
        })['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'