def test_db_empty(): helper.set_env({ 'SCRAPED_ADVERTS_TABLE': 'scraped', 'FILTERED_ADVERTS_TABLE': 'filtered' }) db = boto3.resource('dynamodb') helper.create_table(db, 'scraped', 'title_hash', 'S') filterted_table = helper.create_table(db, 'filtered', 'title_hash', 'S') lambda_handler.handle(None, None) filtered_res = filterted_table.scan() assert len(filtered_res['Items']) == 0
def test_db_not_empty(): helper.set_env({ 'SCRAPED_ADVERTS_TABLE': 'scraped', 'FILTERED_ADVERTS_TABLE': 'filtered' }) db = boto3.resource('dynamodb') scraped_table = helper.create_table(db, 'scraped', 'title_hash', 'S') filterted_table = helper.create_table(db, 'filtered', 'title_hash', 'S') test_items = json.load(open('data_files/test_items.json', 'r')) for item in test_items['items']: item['processed'] = False scraped_table.put_item(Item=item) lambda_handler.handle(None, None) filtered_res = filterted_table.scan() assert len(filtered_res['Items']) > 0
def test_db_cleaner_all_to_clear(): helper.set_env({'SCRAPED_ADVERTS_TABLE': 'scraped'}) db = boto3.resource('dynamodb') table = helper.create_table(db, 'scraped', 'title_hash', 'S') current_time = int(time.time()) - (16 * 24 * 60 * 60) populate_table(table, current_time, current_time) lambda_handler.handle(None, None) items = table.scan() assert len(items['Items']) == 0
def _set_output(self, data): import tables ## If already set, do nothing if self._output is not None: return self._output_format = Format.h5 filters = data.filters node_path = data._v_pathname description = data.description self._output = tables.open_file(self._output_name, 'w', filters=filters) self._payload = create_table(self._output, node_path, description)
def test_adverts_controller_get_valid_pages(): helper.set_env({'FILTERED_ADVERTS_TABLE': 'filtered'}) populate_table( helper.create_table(boto3.resource('dynamodb'), 'filtered', 'title_hash', 'S')) for i in range(0, 5): response = lambda_handler.handle( {'queryStringParameters': { 'page': str(i) }}, None) assert response is not None response_items = json.loads(response['body']) test_response_items = json.load( open('data_files/filtered_adverts_page_{}.json'.format(i), 'r')) assert response_items == test_response_items
def test_adverts_get_non_existing_page(): helper.set_env({'FILTERED_ADVERTS_TABLE': 'filtered'}) populate_table( helper.create_table(boto3.resource('dynamodb'), 'filtered', 'title_hash', 'S')) response = lambda_handler.handle({'queryStringParameters': { 'page': '-1' }}, None) assert response is not None assert response[ 'body'] == '{"items": [], "page": -1, "number_of_pages": 5, "count": 112, "page_count": 0}' response = lambda_handler.handle( {'queryStringParameters': { 'page': '1000' }}, None) assert response is not None assert response[ 'body'] == '{"items": [], "page": 1000, "number_of_pages": 5, "count": 112, "page_count": 0}'
# Go through each of the links and parse them count = 1 tot = len(to_parse) for url in to_parse: # Get the appropriate fields of the URL holders = url.split('/') COUNTRY = holders[6] holders.pop(7) # Create the temporary URL and base location temp_url = F1 + '/'.join(holders) + '/' base_loc = 'data/' + YEAR + '/' + COUNTRY + '/' print('\n=====', COUNTRY.upper(), '=====', str(count), 'out of', str(tot)) # Get and write PRACTICE sessions write_file(create_table(temp_url + PRACTICE1), base_loc + PRACTICE1.replace('.html', '.txt')) write_file(create_table(temp_url + PRACTICE2), base_loc + PRACTICE2.replace('.html', '.txt')) write_file(create_table(temp_url + PRACTICE3), base_loc + PRACTICE3.replace('.html', '.txt')) # Get and write QUALIFYING data write_file(create_table(temp_url + QUALIFYING), base_loc + QUALIFYING.replace('.html', '.txt')) # Get and write RACE data write_file(create_table(temp_url + STARTING), base_loc + STARTING.replace('.html', '.txt')) write_file(create_table(temp_url + FASTEST), base_loc + FASTEST.replace('.html', '.txt')) write_file(create_table(temp_url + RACE), base_loc + RACE.replace('.html', '.txt'))