def find_longest_length(driver, tag, n): url_start = 'https://api.punkapi.com/v2/beers?page=' url_end = '&per_page=80' max_length = 0 for i in range(n): url = url_start + str(i + 1) + url_end temp_db_json = beerproj.fetch_json(driver, url) for x in temp_db_json: try: if type(x[tag]) is str: len_mod = len(x[tag]) # this is actually not needed, but for illustration, if a returned type is json, # after json.loads(), the returned value would be dict elif type(x[tag]) is (int or float): len_mod = x[tag] elif type(x[tag]) is dict: len_mod = len(x[tag]) elif type(x[tag]) is None: len_mod = 0 else: print("there is unexpected type: ", type(x[tag])) break if (len_mod > max_length): max_length = len_mod except Exception as e: pass print('Ooops..., the %s error happened!' % (e)) print('Place of error: ', x['id']) driver.implicitly_wait(30) return max_length
def get_all_tag_value(driver, tag, n): url_start = 'https://api.punkapi.com/v2/beers?page=' url_end = '&per_page=80' tag_list = [] for i in range(n): url = url_start + str(i + 1) + url_end temp_db_json = beerproj.fetch_json(driver, url) for x in temp_db_json: try: tag_value = x[tag] if (tag_value not in tag_list): tag_list.append(tag_value) except Exception as e: pass print('Ooops..., the %s error happened!' % (e)) print('Place of error: ', x['id']) driver.implicitly_wait(30) return (tag_list)
def get_max_tags(driver): try: url_start = 'https://api.punkapi.com/v2/beers?page=' url_end = '&per_page=80' tag_list = [] for i in range(6): url = url_start + str(i + 1) + url_end temp_db_json = beerproj.fetch_json(driver, url) for x in temp_db_json: keys = list(x.keys()) for key_elem in keys: if key_elem not in tag_list: tag_list.append(key_elem) driver.implicitly_wait(30) return tag_list except Exception as e: print('Ooops..., the %s error happened!' % (e)) print('Place of error: ', x) pass
def get_json_keys(driver, tag, n): url_start = 'https://api.punkapi.com/v2/beers?page=' url_end = '&per_page=80' max_depth = 0 json_keys_all = [] for i in range(n): url = url_start + str(i + 1) + url_end temp_db_json = beerproj.fetch_json(driver, url) for x in temp_db_json: try: tag_value = x[tag] depth, json_keys = get_dict_allkeys(tag_value) for json_key in json_keys: if json_key not in json_keys_all: json_keys_all.append(json_key) if depth > max_depth: max_depth = depth except Exception as e: pass print('Ooops..., the %s error happened!' % (e)) print('Place of error: ', x['id']) driver.implicitly_wait(30) return json_keys_all, max_depth
'ibu', 'target_fg', 'target_og', 'ebc', 'srm', 'ph', 'attenuation_level', 'volume', 'boil_volume', 'method', 'ingredients', 'food_pairing', 'brewers_tips', 'contributed_by' ] # x indicates how many times you want to execute the random fetch x = 100 ctn = 0 # how many consecutive times that the returned value already been inserted into database ctn_non_execute = 0 db_prepare_json = [] saved_json_id = [] # the main part, # get non-repetative data for 100 times while ctn < x: temp_db_json = beerproj.fetch_json(driver_main, url)[0] # this try is to prevent less than 100 information left if ctn_non_execute <= 15: if temp_db_json['id'] not in saved_json_id: # if value not repeated, clean None/missing column values dirtyworks.find_loss_columns(temp_db_json) dirtyworks.kick_none(temp_db_json) dirtyworks.repair_dates(temp_db_json) db_prepare_json.append(temp_db_json) saved_json_id.append(temp_db_json['id']) # insert data into db add_tuple = [] for index, (key, value) in enumerate(temp_db_json.items()): add_tuple.append(json.dumps(value)) add_tuple[3] = add_tuple[3].strip('"') insert_query = """INSERT INTO beer_Brewdog VALUES