def test_1news_minfinUA(self, requests_fun): self.assertEqual(parse_minfin_headlines(), self.news_minfin_data, 'wrong data after parser') result = parent([ (news_minfin.parse_minfin_headlines, ), ], 'news') self.assertEqual(result, (len(self.news_minfin_data), 0), 'parse_minfin_headlines wrong return') self.assertEqual( news.find({}).count(), len(self.news_minfin_data), 'wrong news number in DB') test_doc = news.find_one( {'href': 'http://minfin.com.ua/2017/03/17/26771826/'}) self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53), 'UTC time for doc error') # duplicate insert result = parent([ (news_minfin.parse_minfin_headlines, ), ], 'news') self.assertEqual( result, (0, len(self.news_minfin_data)), 'parse_minfin_headlines wrong return; ' 'error in duplicate insert') self.assertEqual( news.find({}).count(), len(self.news_minfin_data), 'wrong news number in DB') test_doc = news.find_one( {'href': 'http://minfin.com.ua/2017/03/17/26771826/'}) self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53), 'UTC time for doc error')
def test_2minfin_headlines(self, requests_fun): url = 'http://www.minfin.gov.ua/news/novini-ta-media' self.assertEqual(self.minfin_headlines_novini_ta_media_data, minfin_headlines_url(url), 'error in parse_minfin_headlines') url = 'http://www.minfin.gov.ua/news/borg/zovnishni-suverenni-zobovjazannja' self.assertEqual(self.minfin_headlines_ZSZ_data, minfin_headlines_url(url), 'error in parse_minfin_headlines') self.assertEqual(self.minfin_headlines_all, minfin_headlines(), 'error in parse_minfin_headlines') # mongo tests result = parent([(minfin.minfin_headlines, ), ], 'news') self.assertEqual(len(self.minfin_headlines_all), len(self.minfin_headlines_novini_ta_media_data) + len(self.minfin_headlines_ZSZ_data), 'simple check; sum of first two list should be eq. to third') self.assertEqual(result, (len(self.minfin_headlines_all), 0), 'minfin_headlines_all return') self.assertEqual(news.find({}).count(), len(self.minfin_headlines_all), 'wrong news number in DB') test_doc = news.find_one({'href': 'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja'}) self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0), 'UTC time for doc error') # mongo duplicate test result = parent([(minfin.minfin_headlines,), ], 'news') self.assertEqual(result, (0, len(self.minfin_headlines_all)), 'minfin_headlines_all wrong return') self.assertEqual(news.find({}).count(), len(self.minfin_headlines_all), 'wrong news number in DB') test_doc = news.find_one({'href': 'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja'}) self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0), 'UTC time for doc error')
def test_mongo_data(self): result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') print(result) print('self.data_all= ', len(self.data_all)) first_insert_time = datetime.now(tz=local_tz) self.assertEqual(result, (len(self.data_all) - 6 + 1, 0), '6 - docs with cookies from minfin in "record";' 'in "data_active" present only 1 doc with cookies') # TODO: could be poblem. In records inserted not all cookies, in data_active inserted all cookies self.assertEqual(result[0], records.find({}).count(), 'should present 1 doc with cookies') self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1) self.assertEqual(data_active.find({}).count(), len(self.data_all), 'should be {} in "data_active"'.format( len(self.data_all))) self.assertEqual(data_active.find({}).count(), records.find({}).count() + 6 - 1) test_record_times = {} test_record_first_update_time = {} for spider, bid in self.test_bid.items(): test_record_times[spider] = data_active.find_one({'bid': bid})['time'] doc = data_active.find_one({'bid': bid}) doc = date_depends_time(doc) # fix date if time is grade then current TODO: need check, if change need in code self.assertLess(doc['time'], doc['time_update']) test_record_first_update_time[spider] = data_active.find_one({'bid': bid})['time_update'] sleep(1) # second insert of same data result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') self.assertEqual(result, (0, 0)) for spider, bid in self.test_bid.items(): assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE "time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE ' assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \ '"data_active"'.format(len(self.data_all)) self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1) assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer' # 3-d inssert of data with deleted records result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox_del), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del), (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records') print(result) self.assertEqual(result, (0, 8)) assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \ 'should not be equal' for spider, bid in self.test_bid.items(): assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \ '"time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE'
def test_2minfin_headlines(self, requests_fun): url = 'http://www.minfin.gov.ua/news/novini-ta-media' self.assertEqual(self.minfin_headlines_novini_ta_media_data, minfin_headlines_url(url), 'error in parse_minfin_headlines') url = 'http://www.minfin.gov.ua/news/borg/zovnishni-suverenni-zobovjazannja' self.assertEqual(self.minfin_headlines_ZSZ_data, minfin_headlines_url(url), 'error in parse_minfin_headlines') self.assertEqual(self.minfin_headlines_all, minfin_headlines(), 'error in parse_minfin_headlines') # mongo tests result = parent([ (minfin.minfin_headlines, ), ], 'news') self.assertEqual( len(self.minfin_headlines_all), len(self.minfin_headlines_novini_ta_media_data) + len(self.minfin_headlines_ZSZ_data), 'simple check; sum of first two list should be eq. to third') self.assertEqual(result, (len(self.minfin_headlines_all), 0), 'minfin_headlines_all return') self.assertEqual( news.find({}).count(), len(self.minfin_headlines_all), 'wrong news number in DB') test_doc = news.find_one({ 'href': 'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja' }) self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0), 'UTC time for doc error') # mongo duplicate test result = parent([ (minfin.minfin_headlines, ), ], 'news') self.assertEqual(result, (0, len(self.minfin_headlines_all)), 'minfin_headlines_all wrong return') self.assertEqual( news.find({}).count(), len(self.minfin_headlines_all), 'wrong news number in DB') test_doc = news.find_one({ 'href': 'http://www.minfin.gov.ua/news/view/ministerstvo-finansiv-ukrainy-zdiisnylo-vyplatu-kuponnoho-dokhodu-za-oblihatsiiamy-zovnishnoi-derzhavnoi-pozyky?category=borg&subcategory=zovnishni-suverenni-zobovjazannja' }) self.assertEqual(test_doc['time'], datetime(2017, 2, 28, 22, 0), 'UTC time for doc error')
def test_1news_minfinUA(self, requests_fun): self.assertEqual(parse_minfin_headlines(), self.news_minfin_data, 'wrong data after parser') result = parent([(news_minfin.parse_minfin_headlines, ), ], 'news') self.assertEqual(result, (len(self.news_minfin_data), 0), 'parse_minfin_headlines wrong return') self.assertEqual(news.find({}).count(), len(self.news_minfin_data), 'wrong news number in DB') test_doc = news.find_one({'href': 'http://minfin.com.ua/2017/03/17/26771826/'}) self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53), 'UTC time for doc error') # duplicate insert result = parent([(news_minfin.parse_minfin_headlines,), ], 'news') self.assertEqual(result, (0, len(self.news_minfin_data)), 'parse_minfin_headlines wrong return; ' 'error in duplicate insert') self.assertEqual(news.find({}).count(), len(self.news_minfin_data), 'wrong news number in DB') test_doc = news.find_one({'href': 'http://minfin.com.ua/2017/03/17/26771826/'}) self.assertEqual(test_doc['time'], datetime(2017, 3, 17, 17, 53), 'UTC time for doc error')
def test_insert(self): result = parent([(data_api_minfin, self.get_triple_data_test),], 'records') print(result) first_insert_time = datetime.now(tz=local_tz) assert result == (308, 0), 'wrong parent return, etalon is 308 records' assert result[0] == records.find({}).count(), 'function insert report wrong' assert records.find({}).count() == 308, 'should be 308 in "records"' assert data_active.find({}).count() == 308, 'should be 308 in "data_active"' assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \ 'should be equal' assert records.find({'session': True}).count() == 6, 'should be 6 cookies in "records"' assert data_active.find({'session': True}).count() == 6, 'should be 6 cookies in "data_active"' test_record_time = data_active.find_one({'bid': '32166583'})['time'] assert data_active.find_one({'bid': '32166583'})['time'] < \ data_active.find_one({'bid': '32166583'})['time_update'] test_record_first_update_time = data_active.find_one({'bid': '32166583'})['time_update'] # second insert of same data result = parent([(data_api_minfin, self.get_triple_data_test), ], 'records') assert result == (308, 0), 'wrong parent return, etalon is 308 records' assert data_active.find_one({'bid': '32166583'})['time'] == test_record_time, 'after UPDATE "time" should be same' assert data_active.find_one({'bid': '32166583'})['time_update'] > test_record_first_update_time, \ 'time_update should be changed in UPDATE' assert data_active.find({}).count() == 308, 'should be 308 in "data_active"' assert records.find({}).count() == 308 * 2, 'should be 616 in "records"' assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer' # 3-d inssert of data with deleted records result = parent([(data_api_minfin, self.get_triple_data_test_del), ], 'records') print(result) assert result == (302, 6), 'wrong parent return, etalon is 302 records' assert result[0] == data_active.find({}).count(), 'function insert report wrong' assert data_active.find({}).count() == 302, 'should be 302 in "records"' assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \ 'should not be equal' assert data_active.find({'session': True}).count() == 6, 'should be 6 cookies in "data_active"' assert data_active.find_one({'bid': '32166583'})['time'] == test_record_time, 'after UPDATE "time" should be same' assert data_active.find_one({'bid': '32166583'})['time_update'] > test_record_first_update_time, \ 'time_update should be changed in UPDATE'
def test_3parse_announcement_ovdp(self, requests_fun): result = announcement_ovdp() result = [{**doc, **{'time': kyiv_tz.localize(datetime(2017, 3, 19, 19, 32, 34, 319882))}} for doc in result] # replace today date on date from etalon announcement_ovdp_data = [{**doc, **{'headline': doc['headline'] + ' in a same_date', 'flag': 'same_date'}} if doc['time_auction'].date() == datetime.today().date() else doc for doc in self.announcement_ovdp_data] # print(announcement_ovdp_data, result) self.assertEqual(announcement_ovdp_data, result, 'error in parse_announcement_ovdp') # mongo tests current_time = datetime.utcnow() mongo_result = parent([(minfin.announcement_ovdp, ), ], 'news') self.assertEqual(mongo_result, (len(self.announcement_ovdp_data), 0), 'announcement_ovdp wrong return') self.assertEqual(news.find({}).count(), len(self.announcement_ovdp_data), 'wrong news number in DB') test_doc = news.find_one({'href_announce': 'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc'}) if datetime.today().date() != datetime(2017, 3, 21).date(): self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017', 'quick check NAME of doc') else: self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017 in a same_date', 'quick check NAME of doc') self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0), 'time_auction should be same') self.assertAlmostEqual(test_doc['time'], current_time, delta=timedelta(seconds=1), msg='time should be same as insert data') # mongo check duplicate mongo_result = parent([(minfin.announcement_ovdp, ), ], 'news') self.assertEqual(mongo_result, (0, len(self.announcement_ovdp_data)), 'announcement_ovdp wrong return') self.assertEqual(news.find({}).count(), len(self.announcement_ovdp_data), 'wrong news number in DB') test_doc = news.find_one({'href_announce': 'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc'}) if datetime.today().date() != datetime(2017, 3, 21).date(): self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017', 'quick check NAME of doc') else: self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017 in a same_date', 'quick check NAME of doc') self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0), 'time_auction should be same') self.assertAlmostEqual(test_doc['time'], current_time, delta=timedelta(seconds=1), msg='time should be same as INSERT data')
def test_insert(self): result = parent([(data_api_finance_ua, self.fetch_data_test),], 'records') print(result) first_insert_time = datetime.now(tz=local_tz) assert result == (len(self.data), 0), 'wrong parent return, etalon is {} records'.format(len(self.data)) assert result[0] == records.find({}).count(), 'function insert report wrong' assert records.find({}).count() == len(self.data), 'should be {} in "records"'.format(len(self.data)) assert data_active.find({}).count() == len(self.data), 'should be {} in "data_active"'.format(len(self.data)) assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \ 'should be equal' test_bid = 960072 test_record_time = data_active.find_one({'bid': test_bid})['time'] bid_time = data_active.find_one({'bid': test_bid})['time'] bid_time_update = data_active.find_one({'bid': test_bid})['time_update'] # assert bid_time < bid_time_update, 'bid_time= {}, bid_time_update= {}'.format(bid_time, bid_time_update) test_record_first_update_time = data_active.find_one({'bid': test_bid})['time_update'] # second insert of same data result = parent([(data_api_finance_ua, self.fetch_data_test),], 'records') assert result == (len(self.data), 0), 'wrong parent return, etalon is {} records'.format(len(self.data)) assert data_active.find_one({'bid': test_bid})['time'] == test_record_time, 'after UPDATE "time" should be same' assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \ 'time_update should be changed in UPDATE' assert data_active.find({}).count() == len(self.data), 'should be {} in "data_active"'.format(len(self.data)) assert records.find({}).count() == len(self.data) * 2, 'should be {} in "records"'.format(len(self.data)*2) assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer' # 3-d inssert of data with deleted records result = parent([(data_api_finance_ua, self.fetch_data_test_del), ], 'records') print(result) assert result == (len(self.data) - 1, 1), 'wrong parent return, etalon is {} records'.format(len(self.data)-1) assert result[0] == data_active.find({}).count(), 'function insert report wrong' assert data_active.find({}).count() == len(self.data) - 1, 'should be {} in "records"'.format(len(self.data)-1) assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \ 'should not be equal' assert data_active.find_one({'bid': test_bid})['time'] == test_record_time, 'after UPDATE "time" should be same' assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \ 'time_update should be changed in UPDATE'
def test_mongo_data(self): result = parent( [(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') print(result) first_insert_time = datetime.now(tz=local_tz) self.assertEqual( result, (len(self.data_all), 0), 'wrong parent return, etalon is {} records'.format( len(self.data_all))) self.assertEqual(result[0], records.find({}).count(), 'function reported = {}, ' \ 'docs in DB= {}'.format(result[0], records.find({}).count())) assert records.find({}).count() == len( self.data_all), 'should be {} in "records"'.format( len(self.data_all)) assert data_active.find({}).count() == len( self.data_all), 'should be {} in "data_active"'.format( len(self.data_all)) assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and ' \ '"data_active" should be equal' test_record_times = {} test_record_first_update_time = {} for spider, bid in self.test_bid.items(): test_record_times[spider] = data_active.find_one({'bid': bid})['time'] doc = data_active.find_one({'bid': bid}) assert doc['time'] < doc['time_update'], 'bid {bid} with time {time}, ' \ 'but in DB update {time_update}'.format(bid=bid, time=doc['time'], time_update=doc['time_update']) test_record_first_update_time[spider] = data_active.find_one( {'bid': bid})['time_update'] sleep(1) # second insert of same data result = parent( [(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') assert result == (len(self.data_all), 0), 'wrong parent return ={}, ' \ 'etalon is {} records'.format(result, len(self.data_all)) for spider, bid in self.test_bid.items(): assert data_active.find_one( {'bid': bid})['time'] == test_record_times[ spider], 'after UPDATE "time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE ' assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \ '"data_active"'.format(len(self.data_all)) assert records.find({}).count() == len(self.data_all) * 2, 'should be {} in ' \ '"records"'.format(len(self.data_all) * 2) assert first_insert_time < data_active.find_one( )['time_update'], 'time_update is not newer' # 3-d inssert of data with deleted records result = parent( [(berlox.data_api_berlox, self.fetch_data_test_berlox_del), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del), (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records') print(result) assert result == (len(self.data_all) - 8, 8), 'wrong parent return, ' \ 'etalon is {} records'.format(len(self.data_all) - 8) assert result[0] == data_active.find( {}).count(), 'function insert report wrong' assert data_active.find({}).count() == len(self.data_all) - 8, 'should be {} ' \ 'in "records"'.format(len(self.data_all) - 8) assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \ 'should not be equal' for spider, bid in self.test_bid.items(): assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \ '"time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE'
def test_mongo_data(self): result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') print(result) first_insert_time = datetime.now(tz=local_tz) self.assertEqual(result, (len(self.data_all), 0), 'wrong parent return, etalon is {} records'.format(len(self.data_all))) self.assertEqual(result[0], records.find({}).count(), 'function reported = {}, ' \ 'docs in DB= {}'.format(result[0], records.find({}).count())) assert records.find({}).count() == len(self.data_all), 'should be {} in "records"'.format(len(self.data_all)) assert data_active.find({}).count() == len(self.data_all), 'should be {} in "data_active"'.format( len(self.data_all)) assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and ' \ '"data_active" should be equal' test_record_times = {} test_record_first_update_time = {} for spider, bid in self.test_bid.items(): test_record_times[spider] = data_active.find_one({'bid': bid})['time'] doc = data_active.find_one({'bid': bid}) assert doc['time'] < doc['time_update'], 'bid {bid} with time {time}, ' \ 'but in DB update {time_update}'.format(bid=bid, time=doc['time'], time_update=doc['time_update']) test_record_first_update_time[spider] = data_active.find_one({'bid': bid})['time_update'] sleep(1) # second insert of same data result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') assert result == (len(self.data_all), 0), 'wrong parent return ={}, ' \ 'etalon is {} records'.format(result, len(self.data_all)) for spider, bid in self.test_bid.items(): assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE "time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE ' assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \ '"data_active"'.format(len(self.data_all)) assert records.find({}).count() == len(self.data_all) * 2, 'should be {} in ' \ '"records"'.format(len(self.data_all) * 2) assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer' # 3-d inssert of data with deleted records result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox_del), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del), (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records') print(result) assert result == (len(self.data_all) - 8, 8), 'wrong parent return, ' \ 'etalon is {} records'.format(len(self.data_all) - 8) assert result[0] == data_active.find({}).count(), 'function insert report wrong' assert data_active.find({}).count() == len(self.data_all) - 8, 'should be {} ' \ 'in "records"'.format(len(self.data_all) - 8) assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \ 'should not be equal' for spider, bid in self.test_bid.items(): assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \ '"time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE'
def test_3parse_announcement_ovdp(self, requests_fun): result = announcement_ovdp() result = [{ **doc, **{ 'time': kyiv_tz.localize( datetime(2017, 3, 19, 19, 32, 34, 319882)) } } for doc in result] # replace today date on date from etalon announcement_ovdp_data = [{ **doc, **{ 'headline': doc['headline'] + ' in a same_date', 'flag': 'same_date' } } if doc['time_auction'].date() == datetime.today().date() else doc for doc in self.announcement_ovdp_data] # print(announcement_ovdp_data, result) self.assertEqual(announcement_ovdp_data, result, 'error in parse_announcement_ovdp') # mongo tests current_time = datetime.utcnow() mongo_result = parent([ (minfin.announcement_ovdp, ), ], 'news') self.assertEqual(mongo_result, (len(self.announcement_ovdp_data), 0), 'announcement_ovdp wrong return') self.assertEqual( news.find({}).count(), len(self.announcement_ovdp_data), 'wrong news number in DB') test_doc = news.find_one({ 'href_announce': 'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc' }) if datetime.today().date() != datetime(2017, 3, 21).date(): self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017', 'quick check NAME of doc') else: self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017 in a same_date', 'quick check NAME of doc') self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0), 'time_auction should be same') self.assertAlmostEqual(test_doc['time'], current_time, delta=timedelta(seconds=1), msg='time should be same as insert data') # mongo check duplicate mongo_result = parent([ (minfin.announcement_ovdp, ), ], 'news') self.assertEqual(mongo_result, (0, len(self.announcement_ovdp_data)), 'announcement_ovdp wrong return') self.assertEqual( news.find({}).count(), len(self.announcement_ovdp_data), 'wrong news number in DB') test_doc = news.find_one({ 'href_announce': 'http://www.minfin.gov.ua/uploads/redactor/files/48-52%20оголошення-сайт.doc' }) if datetime.today().date() != datetime(2017, 3, 21).date(): self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017', 'quick check NAME of doc') else: self.assertEqual(test_doc['headline'], 'OVDP 21.03.2017 in a same_date', 'quick check NAME of doc') self.assertEqual(test_doc['time_auction'], datetime(2017, 3, 21, 0, 0), 'time_auction should be same') self.assertAlmostEqual(test_doc['time'], current_time, delta=timedelta(seconds=1), msg='time should be same as INSERT data')
def test_mongo_data(self): result = parent( [(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') print(result) print('self.data_all= ', len(self.data_all)) first_insert_time = datetime.now(tz=local_tz) self.assertEqual( result, (len(self.data_all) - 6 + 1, 0), '6 - docs with cookies from minfin in "record";' 'in "data_active" present only 1 doc with cookies') # TODO: could be poblem. In records inserted not all cookies, in data_active inserted all cookies self.assertEqual(result[0], records.find({}).count(), 'should present 1 doc with cookies') self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1) self.assertEqual( data_active.find({}).count(), len(self.data_all), 'should be {} in "data_active"'.format(len(self.data_all))) self.assertEqual( data_active.find({}).count(), records.find({}).count() + 6 - 1) test_record_times = {} test_record_first_update_time = {} for spider, bid in self.test_bid.items(): test_record_times[spider] = data_active.find_one({'bid': bid})['time'] doc = data_active.find_one({'bid': bid}) doc = date_depends_time( doc ) # fix date if time is grade then current TODO: need check, if change need in code self.assertLess(doc['time'], doc['time_update']) test_record_first_update_time[spider] = data_active.find_one( {'bid': bid})['time_update'] sleep(1) # second insert of same data result = parent( [(berlox.data_api_berlox, self.fetch_data_test_berlox), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA), (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records') self.assertEqual(result, (0, 0)) for spider, bid in self.test_bid.items(): assert data_active.find_one( {'bid': bid})['time'] == test_record_times[ spider], 'after UPDATE "time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE ' assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \ '"data_active"'.format(len(self.data_all)) self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1) assert first_insert_time < data_active.find_one( )['time_update'], 'time_update is not newer' # 3-d inssert of data with deleted records result = parent( [(berlox.data_api_berlox, self.fetch_data_test_berlox_del), (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del), (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records') print(result) self.assertEqual(result, (0, 8)) assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \ 'should not be equal' for spider, bid in self.test_bid.items(): assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \ '"time" should be same' assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \ 'time_update should be changed in UPDATE'
def test_insert(self): result = parent([ (data_api_finance_ua, self.fetch_data_test), ], 'records') print(result) first_insert_time = datetime.now(tz=local_tz) assert result == (len( self.data), 0), 'wrong parent return, etalon is {} records'.format( len(self.data)) assert result[0] == records.find( {}).count(), 'function insert report wrong' assert records.find({}).count() == len( self.data), 'should be {} in "records"'.format(len(self.data)) assert data_active.find({}).count() == len( self.data), 'should be {} in "data_active"'.format(len(self.data)) assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \ 'should be equal' test_bid = 960072 test_record_time = data_active.find_one({'bid': test_bid})['time'] bid_time = data_active.find_one({'bid': test_bid})['time'] bid_time_update = data_active.find_one({'bid': test_bid})['time_update'] # assert bid_time < bid_time_update, 'bid_time= {}, bid_time_update= {}'.format(bid_time, bid_time_update) test_record_first_update_time = data_active.find_one({'bid': test_bid })['time_update'] # second insert of same data result = parent([ (data_api_finance_ua, self.fetch_data_test), ], 'records') assert result == (len( self.data), 0), 'wrong parent return, etalon is {} records'.format( len(self.data)) assert data_active.find_one({ 'bid': test_bid })['time'] == test_record_time, 'after UPDATE "time" should be same' assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \ 'time_update should be changed in UPDATE' assert data_active.find({}).count() == len( self.data), 'should be {} in "data_active"'.format(len(self.data)) assert records.find({}).count() == len( self.data) * 2, 'should be {} in "records"'.format( len(self.data) * 2) assert first_insert_time < data_active.find_one( )['time_update'], 'time_update is not newer' # 3-d inssert of data with deleted records result = parent([ (data_api_finance_ua, self.fetch_data_test_del), ], 'records') print(result) assert result == ( len(self.data) - 1, 1), 'wrong parent return, etalon is {} records'.format( len(self.data) - 1) assert result[0] == data_active.find( {}).count(), 'function insert report wrong' assert data_active.find({}).count() == len( self.data) - 1, 'should be {} in "records"'.format( len(self.data) - 1) assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \ 'should not be equal' assert data_active.find_one({ 'bid': test_bid })['time'] == test_record_time, 'after UPDATE "time" should be same' assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \ 'time_update should be changed in UPDATE'