def json_test_5() -> bool: print('多级list测试') f = open('./jsontest/eg5.json', 'r') obj: One3 = selfusepy.parse_json(f.read(), One3()) print(obj) f.close() return isinstance(obj, One3)
def is_req_danmaku(aid: int, cid_len: int) -> (bool, int): if cid_len == 1: return True, -1 else: key = '%s-req-times' % aid res = red.get(key) req: ReqTimes = ReqTimes() if res: # exist try: req = selfusepy.parse_json(str(res, encoding='utf-8'), ReqTimes()) except BaseException: # 兼容v0.0.5的req-times, 可于过段时间后删去 i = int(res) req.cid_len = cid_len req.req_times = i else: # not exist, init red.set(key, json.dumps(ReqTimes(cid_len, 1).__dict__, default=int)) expire_policy(key, cid_len) return True, 1 if is_out_of_max_req_times(cid_len, req.req_times): return False, req.req_times else: req.req_times += 1 red.set(key, json.dumps(req.__dict__, default=int)) expire_policy(key, cid_len) return True, req.req_times
def processing_data(j: str, get_data_time: datetime): obj: AV = selfusepy.parse_json(j, AV()) log.info("[Saving] top avs data: %s" % get_data_time.isoformat()) session = DBSession() for i, item in enumerate(obj.onlineList): avInfoDO = AVInfoDO(item) avStatDO = AVStatDO(item, i + 1, get_data_time) exist: AVInfoDO = session.query(AVInfoDO).filter( AVInfoDO.aid == avInfoDO.aid).first() """ 存在则只添加关于av的statistic """ try: if not exist: session.add(avInfoDO) session.add(avStatDO) log.info('[INSERT] aid: %s' % avInfoDO.aid) else: session.add(avStatDO) log.info('[UPDATE] av statistics, aid: %s' % avInfoDO.aid) session.commit() except BaseException as e: session.rollback() raise e else: log.info("[Update or Insert] success") session.close() log.info('[DONE] save top AVs')
def get_all_objects_key() -> Set[str]: Marker = None object_keys: Set[str] = set() while True: args = dict(Bucket=bucket, Prefix='') if Marker: args['Marker'] = Marker res: dict = s3_client.list_objects(**args) res.pop('ResponseMetadata', None) # remove useless key's data obj: Objects = selfusepy.parse_json(json.dumps(res, default=str), Objects()) if isinstance(obj.Contents, list): for item in obj.Contents: object_keys.add(item.Key) else: return object_keys if not obj.IsTruncated: break else: log.info('[s3] Got %s keys' % object_keys.__len__()) Marker = obj.NextMarker log.info('[s3] received %s objects' % object_keys.__len__()) return object_keys
def json_test_7() -> bool: print("List[int]测试") f = open('./jsontest/eg6.json', 'r') s = f.read() f.close() obj: One4 = selfusepy.parse_json(s, One4()) return isinstance(obj, One4)
def json_test_1() -> bool: """ json test e.g. 1 """ print('多级复杂json转化测试: ') f = open('./jsontest/eg1.json', 'r') obj: One = selfusepy.parse_json(f.read(), One()) print(obj) f.close() return isinstance(obj, One)
def json_test_4() -> bool: """ json test, json-key is different from variable name e.g. 3 """ print('json不同变量名测试: ') f = open('./jsontest/eg4.json', 'r') obj: One2 = selfusepy.parse_json(f.read(), One2()) print(obj) f.close() return isinstance(obj, One2)
def json_test_2() -> bool: """ json test with jsonarray e.g. 2 """ print('包含json array的转化测试: ') f = open('./jsontest/eg2.json', 'r') obj: One1 = selfusepy.parse_json(f.read(), One1()) print(obj) f.close() return isinstance(obj, One1)
def __main__(mids: Set[int]): session = DBSession() for i in mids: mid = {'mid': i} res: HTTPResponse = selfusepy.get( 'https://api.bilibili.com/x/space/acc/info', **mid) isUpdated: bool = False try: resData: UserProfile = selfusepy.parse_json( res.data, UserProfile()) dbData: UserProfileDO = session.query(UserProfileDO).filter( UserProfileDO.mid == i).first() if dbData: # 存在 resDO: UserProfileDO = UserProfileDO(resData) for item in vars(dbData).items(): """ 将获取到的信息与db中的数据进行对比更新 """ if item[0].startswith('_') or item[0] == "fans": """ 由于它是一个由sqlalchemy更改过的DO类, 会有一些sqlalchemy需要的属性, 但我们并不需要的属性, 剔除掉 配合更新fans的方法, 在此不对fans变量进行处理 """ continue try: newValue = getattr(resDO, item[0]) if newValue != item[1]: isUpdated = True log.info( '[UPDATE] mid: %s, key: %s, new: %s, old: %s' % (i, item[0], newValue, item[1])) setattr(dbData, item[0], newValue) except BaseException as e: raise e if not isUpdated: log.info('[EQUAL] mid: %s' % i) else: log.info('[INSERT] mid: %s' % i) session.add(UserProfileDO(resData)) session.commit() except BaseException as e: log.error('mid: %s, data: %s' % (i, res.data)) raise e finally: log.info('[SLEEP] 2s') time.sleep(2) session.close()
def read_file(dir: str, _map: MutableMapping[str, AV] = None) -> MutableMapping[str, AV]: if _map is None: _map = {} dir_or_files = os.listdir(dir) for item in dir_or_files: if not item.startswith("."): current_path = dir + item if os.path.isdir(current_path): current_path += "/" read_file(current_path, _map) else: s: str = open(current_path, "r", encoding = "utf-8").read() _map[current_path] = selfusepy.parse_json(s, AV()) return _map
async def query_all_cid_of_av(avInfo: AVInfoDO): global i_for_queryAllCidOfAv, Last_Request_Time log.info('[START] i: %s' % i_for_queryAllCidOfAv) delta = (Last_Request_Time + REQUEST_TIME_DELTA - time.time_ns()) / 1000_000_000 time.sleep(delta if delta > 0 else 0) i_for_queryAllCidOfAv += 1 log.info('[REQUEST] av\'s cids, aid: %s' % avInfo.aid) Last_Request_Time = time.time_ns() res: HTTPResponse = await selfusepy.get_async( 'https://www.bilibili.com/widget/getPageList?aid=' + str(avInfo.aid)) map: MutableMapping[int, AvDanmakuCid] = {} session = DBSession() try: j = json.loads(res.data) if isinstance(json.loads(res.data), list): for item in j: map[item['cid']] = selfusepy.parse_json( json.dumps(item), AvDanmakuCid()) log.info('[REQUEST] Done') log.info('[DATA] aid: %s, cid len: %s' % (avInfo.aid, map.__len__())) # 删除已经保存aid-cid的对应关系 sql: str = 'select cid from av_cids where aid = %s and cid in (%s)' % ( avInfo.aid, ','.join('%s' % item for item in map.keys())) r: ResultProxy = await execute_sql(sql) exist: Set[int] = set() for item in r.fetchall(): exist.add(item[0]) for item in map.items(): if not exist.__contains__(item[0]): session.add(AVCidsDO(avInfo.aid, item[1])) session.commit() await filter_cid_which_isexist(avInfo.aid, map) except BaseException as e: log.error('aid: %s' % avInfo.aid) raise e finally: session.close()
def getting_data() -> ({str: str}, List[int], Set[int]): log.info('[START] Getting top AVs at bilibili.com') res = selfusepy.get(url='https://www.bilibili.com/video/online.html', head=chromeUserAgent) soup: BeautifulSoup = BeautifulSoup(markup=str(res.data, encoding='utf-8').replace( '\\n', ''), features='lxml') # 获取其中包含的json scripts: List[Tag] = soup.find_all(name='script') # 需要的script标签 script: Tag = scripts[6] # 一般在第5个, 但是有可能会改, 比如20/4/13 17:43:41从第5改为了第6 if script.__str__().__contains__("onlineList"): pass else: for tag in scripts: if tag.__str__().__contains__("onlineList"): script = tag break pattern = re.compile(r'{([\s\S]*)\};') value = pattern.findall(script.prettify()) temp = '{' + str(value[0]).replace('\\n', '') + '}' # remove \n file_name = '%s/online/%s.json' % (config.date, time.time_ns()) file_path = 'data-temp/%s' % file_name _file.save(temp, file_path) # pre processing data aidList: List[int] = list() midSet: Set[int] = set() obj: AV = selfusepy.parse_json(temp, AV()) for item in obj.onlineList: aidList.append(item.aid) midSet.add(item.owner.mid) log.info('[DONE] Getting data. [SLEEP] 2s') time.sleep(2) return {file_name: file_path}, aidList, midSet
def json_test_8() -> bool: print("多级; 不同变量名; variable handler") with open("./jsontest/eg7.json", "r") as f: s = f.read() obj: Obj = selfusepy.parse_json(s, Obj()) return isinstance(obj, Obj)
def get_object(key: str) -> Object: d: dict = s3_client.get_object(Bucket=bucket, Key=key) obj = selfusepy.parse_json(json.dumps(d, default=str), Object()) obj.Body = d['Body'] return obj