Example #1
0
 def parse(self, response):
     jsonresponses = json.loads(response.body_as_unicode())
     dicts = jp(jsonresponses, "$..[Time Series (Daily)]")[0]
     stockname = jp(jsonresponses, "$.[Meta Data]..")[2]
     item = Project1568Item()
     for key in dicts.keys():
         item['open'] = jp(jsonresponses, "$.." + str(key) + "..")[1]
         item['high'] = jp(jsonresponses, "$.." + str(key) + "..")[2]
         item['low'] = jp(jsonresponses, "$.." + str(key) + "..")[3]
         item['close'] = jp(jsonresponses, "$.." + str(key) + "..")[4]
         item['volume'] = jp(jsonresponses, "$.." + str(key) + "..")[5]
         item['_id'] = key
         item['stockname'] = stockname
         yield item
Example #2
0
    def parse(self, response):
        jsonresponses = json.loads(response.body_as_unicode())
        dicts = jp(jsonresponses, "$..[Time Series (1min)]")[0]
        stockname = jp(jsonresponses, "$.[Meta Data]..")[2]
        item = Project1568Item()
        myclient = pymongo.MongoClient()
        mydb = myclient["stock"]
        mycol = mydb[stockname + "_real_time"]

        for key in dicts.keys():
            last = mycol.find_one({"_id": key})
            if last is None:
                item['open'] = jp(jsonresponses, "$.." + str(key) + "..")[1]
                item['high'] = jp(jsonresponses, "$.." + str(key) + "..")[2]
                item['low'] = jp(jsonresponses, "$.." + str(key) + "..")[3]
                item['close'] = jp(jsonresponses, "$.." + str(key) + "..")[4]
                item['volume'] = jp(jsonresponses, "$.." + str(key) + "..")[5]
                item['_id'] = key
                item['stockname'] = stockname
                yield item
            else:
                break
def parse_json(json_data):
    """
    This function will parse the given json body in the API call
    It will return a blueprint of the data which will aid in creating and loading tables
    :param json_data: json data passed in the API call
    :return: a dictionary consisting of the following:-
             1. data type of the columns against each table
             2. list of table names
             3. columns available in each table
             4. values of each column in tha API call for each table
             5. relationship of tables with each other

    """
    # with open(json_file, encoding='utf-8', errors='ignore') as json_data:
    #     data = json.load(json_data, strict=False)
    data = json.loads(json_data)
    flattened_data = flatten(data, separator='.')
    data_type_data = {}
    tables_list = set()
    table_column ={}
    table_values = {}
    parent_child_tables = {}
    for key, value in flattened_data.items():
        if '.' in key:
            # Finding the list of different tables
            if not key.split('.')[-2].isnumeric():
                tables = key.split('.')[-2]
                is_list = False
            else:
                tables = key.split('.')[-3]
                is_list = True
            tables_list.add(tables)
            # Enlisting columns for each table
            if not table_column.get(tables):
                table_column[tables] = set()
            table_column[tables].add(key.split('.')[-1])
            # Enlisting the values for each column and their data types against each table
            if not table_values.get(tables):
                if not is_list:
                    table_values[tables] = {}
                    data_type_data[tables] = {}
                else:
                    table_values[tables] = [{}]*len(jp(data,'$.{key}'.format(key='.'.join(key.split('.')[0:-3]))))
                    data_type_data[tables] =  [{}]*len(jp(data,'$.{key}'.format(key='.'.join(key.split('.')[0:-3]))))
            if not is_list:
                table_values[tables].update({key.split('.')[-1]: value})
                data_type_data[tables].update({key.split('.')[-1]: type(value)})
            else:
                table_values[tables][int(key.split('.')[-2])].update({key.split('.')[-1]: value})
                data_type_data[tables][int(key.split('.')[-2])].update({key.split('.')[-1]: type(value)})
            # Relation tables have with each other. Not if a table has set() against it, it means its a root table
            if not parent_child_tables.get(tables):
                parent_child_tables[tables] = set()
            try:
                if not is_list:
                    parent_child_tables[tables].add(key.split('.')[-3])
                else:
                    parent_child_tables[tables].add(key.split('.')[-4])
            except:
                pass
    return {
            'data_type_data': data_type_data,
            'tables_list': tables_list,
            'table_column': table_column,
            'table_values': table_values,
            'parent_child_tables': parent_child_tables
            }
Example #4
0
from jsonpath import jsonpath as jp
import requests
from fake_useragent import UserAgent
import json

url = "https://www.lagou.com/lbs/getAllCitySearchLabels.json"
headers = {"User-Agent": UserAgent().random}

response = requests.get(url, headers=headers)
names = jp(json.loads(response.text),
           '$..name')  # caution to $$$$$$$$$$$$$$$$$$$
codes = jp(response.json(), '$..code')
print(names)
print(codes)
Example #5
0
import requests
import json
from jsonpath import jsonpath as jp
base_url = 'https://movie.douban.com/j/chart/top_list?type=13&interval_id=100%3A90&action=&start={}&limit=20'

headers = {
    "User-Agent":
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
}
i = 0
while True:
    url = base_url.format(i * 20)
    response = requests.get(url, headers=headers)
    data = response.json()
    if data == [] or data is None:
        break
    Watch_titles = jp(data, '$..[title]')
    Watch_stars = jp(data, '$..[actors]')
    d = json.dumps(Watch_titles, ensure_ascii=False)
    s = json.dumps(Watch_stars, ensure_ascii=False)
    print(d)
    print(s)
    i += 1
# print(response.text)