def parse(self, response): jsonresponses = json.loads(response.body_as_unicode()) dicts = jp(jsonresponses, "$..[Time Series (Daily)]")[0] stockname = jp(jsonresponses, "$.[Meta Data]..")[2] item = Project1568Item() for key in dicts.keys(): item['open'] = jp(jsonresponses, "$.." + str(key) + "..")[1] item['high'] = jp(jsonresponses, "$.." + str(key) + "..")[2] item['low'] = jp(jsonresponses, "$.." + str(key) + "..")[3] item['close'] = jp(jsonresponses, "$.." + str(key) + "..")[4] item['volume'] = jp(jsonresponses, "$.." + str(key) + "..")[5] item['_id'] = key item['stockname'] = stockname yield item
def parse(self, response): jsonresponses = json.loads(response.body_as_unicode()) dicts = jp(jsonresponses, "$..[Time Series (1min)]")[0] stockname = jp(jsonresponses, "$.[Meta Data]..")[2] item = Project1568Item() myclient = pymongo.MongoClient() mydb = myclient["stock"] mycol = mydb[stockname + "_real_time"] for key in dicts.keys(): last = mycol.find_one({"_id": key}) if last is None: item['open'] = jp(jsonresponses, "$.." + str(key) + "..")[1] item['high'] = jp(jsonresponses, "$.." + str(key) + "..")[2] item['low'] = jp(jsonresponses, "$.." + str(key) + "..")[3] item['close'] = jp(jsonresponses, "$.." + str(key) + "..")[4] item['volume'] = jp(jsonresponses, "$.." + str(key) + "..")[5] item['_id'] = key item['stockname'] = stockname yield item else: break
def parse_json(json_data): """ This function will parse the given json body in the API call It will return a blueprint of the data which will aid in creating and loading tables :param json_data: json data passed in the API call :return: a dictionary consisting of the following:- 1. data type of the columns against each table 2. list of table names 3. columns available in each table 4. values of each column in tha API call for each table 5. relationship of tables with each other """ # with open(json_file, encoding='utf-8', errors='ignore') as json_data: # data = json.load(json_data, strict=False) data = json.loads(json_data) flattened_data = flatten(data, separator='.') data_type_data = {} tables_list = set() table_column ={} table_values = {} parent_child_tables = {} for key, value in flattened_data.items(): if '.' in key: # Finding the list of different tables if not key.split('.')[-2].isnumeric(): tables = key.split('.')[-2] is_list = False else: tables = key.split('.')[-3] is_list = True tables_list.add(tables) # Enlisting columns for each table if not table_column.get(tables): table_column[tables] = set() table_column[tables].add(key.split('.')[-1]) # Enlisting the values for each column and their data types against each table if not table_values.get(tables): if not is_list: table_values[tables] = {} data_type_data[tables] = {} else: table_values[tables] = [{}]*len(jp(data,'$.{key}'.format(key='.'.join(key.split('.')[0:-3])))) data_type_data[tables] = [{}]*len(jp(data,'$.{key}'.format(key='.'.join(key.split('.')[0:-3])))) if not is_list: table_values[tables].update({key.split('.')[-1]: value}) data_type_data[tables].update({key.split('.')[-1]: type(value)}) else: table_values[tables][int(key.split('.')[-2])].update({key.split('.')[-1]: value}) data_type_data[tables][int(key.split('.')[-2])].update({key.split('.')[-1]: type(value)}) # Relation tables have with each other. Not if a table has set() against it, it means its a root table if not parent_child_tables.get(tables): parent_child_tables[tables] = set() try: if not is_list: parent_child_tables[tables].add(key.split('.')[-3]) else: parent_child_tables[tables].add(key.split('.')[-4]) except: pass return { 'data_type_data': data_type_data, 'tables_list': tables_list, 'table_column': table_column, 'table_values': table_values, 'parent_child_tables': parent_child_tables }
from jsonpath import jsonpath as jp import requests from fake_useragent import UserAgent import json url = "https://www.lagou.com/lbs/getAllCitySearchLabels.json" headers = {"User-Agent": UserAgent().random} response = requests.get(url, headers=headers) names = jp(json.loads(response.text), '$..name') # caution to $$$$$$$$$$$$$$$$$$$ codes = jp(response.json(), '$..code') print(names) print(codes)
import requests import json from jsonpath import jsonpath as jp base_url = 'https://movie.douban.com/j/chart/top_list?type=13&interval_id=100%3A90&action=&start={}&limit=20' headers = { "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36' } i = 0 while True: url = base_url.format(i * 20) response = requests.get(url, headers=headers) data = response.json() if data == [] or data is None: break Watch_titles = jp(data, '$..[title]') Watch_stars = jp(data, '$..[actors]') d = json.dumps(Watch_titles, ensure_ascii=False) s = json.dumps(Watch_stars, ensure_ascii=False) print(d) print(s) i += 1 # print(response.text)