Exemple #1
0
def json_test_5() -> bool:
    print('多级list测试')
    f = open('./jsontest/eg5.json', 'r')
    obj: One3 = selfusepy.parse_json(f.read(), One3())
    print(obj)
    f.close()
    return isinstance(obj, One3)
Exemple #2
0
def is_req_danmaku(aid: int, cid_len: int) -> (bool, int):
    if cid_len == 1:
        return True, -1
    else:
        key = '%s-req-times' % aid
        res = red.get(key)
        req: ReqTimes = ReqTimes()
        if res:  # exist
            try:
                req = selfusepy.parse_json(str(res, encoding='utf-8'),
                                           ReqTimes())
            except BaseException:  # 兼容v0.0.5的req-times, 可于过段时间后删去
                i = int(res)
                req.cid_len = cid_len
                req.req_times = i
        else:  # not exist, init
            red.set(key, json.dumps(ReqTimes(cid_len, 1).__dict__,
                                    default=int))
            expire_policy(key, cid_len)
            return True, 1

        if is_out_of_max_req_times(cid_len, req.req_times):
            return False, req.req_times
        else:
            req.req_times += 1
            red.set(key, json.dumps(req.__dict__, default=int))
            expire_policy(key, cid_len)
            return True, req.req_times
Exemple #3
0
def processing_data(j: str, get_data_time: datetime):
    obj: AV = selfusepy.parse_json(j, AV())
    log.info("[Saving] top avs data: %s" % get_data_time.isoformat())
    session = DBSession()
    for i, item in enumerate(obj.onlineList):
        avInfoDO = AVInfoDO(item)
        avStatDO = AVStatDO(item, i + 1, get_data_time)

        exist: AVInfoDO = session.query(AVInfoDO).filter(
            AVInfoDO.aid == avInfoDO.aid).first()
        """
    存在则只添加关于av的statistic
    """
        try:
            if not exist:
                session.add(avInfoDO)
                session.add(avStatDO)
                log.info('[INSERT] aid: %s' % avInfoDO.aid)
            else:
                session.add(avStatDO)
                log.info('[UPDATE] av statistics, aid: %s' % avInfoDO.aid)

            session.commit()
        except BaseException as e:
            session.rollback()
            raise e
        else:
            log.info("[Update or Insert] success")

    session.close()
    log.info('[DONE] save top AVs')
Exemple #4
0
def get_all_objects_key() -> Set[str]:
    Marker = None
    object_keys: Set[str] = set()
    while True:
        args = dict(Bucket=bucket, Prefix='')
        if Marker:
            args['Marker'] = Marker
        res: dict = s3_client.list_objects(**args)
        res.pop('ResponseMetadata', None)  # remove useless key's data
        obj: Objects = selfusepy.parse_json(json.dumps(res, default=str),
                                            Objects())
        if isinstance(obj.Contents, list):
            for item in obj.Contents:
                object_keys.add(item.Key)
        else:
            return object_keys

        if not obj.IsTruncated:
            break
        else:
            log.info('[s3] Got %s keys' % object_keys.__len__())
            Marker = obj.NextMarker

    log.info('[s3] received %s objects' % object_keys.__len__())
    return object_keys
Exemple #5
0
def json_test_7() -> bool:
    print("List[int]测试")
    f = open('./jsontest/eg6.json', 'r')
    s = f.read()
    f.close()
    obj: One4 = selfusepy.parse_json(s, One4())

    return isinstance(obj, One4)
Exemple #6
0
def json_test_1() -> bool:
    """
  json test
  e.g. 1
  """
    print('多级复杂json转化测试: ')
    f = open('./jsontest/eg1.json', 'r')
    obj: One = selfusepy.parse_json(f.read(), One())
    print(obj)
    f.close()
    return isinstance(obj, One)
Exemple #7
0
def json_test_4() -> bool:
    """
  json test, json-key is different from variable name
  e.g. 3
  """
    print('json不同变量名测试: ')
    f = open('./jsontest/eg4.json', 'r')
    obj: One2 = selfusepy.parse_json(f.read(), One2())
    print(obj)
    f.close()
    return isinstance(obj, One2)
Exemple #8
0
def json_test_2() -> bool:
    """
  json test with jsonarray
  e.g. 2
  """
    print('包含json array的转化测试: ')
    f = open('./jsontest/eg2.json', 'r')
    obj: One1 = selfusepy.parse_json(f.read(), One1())
    print(obj)
    f.close()
    return isinstance(obj, One1)
Exemple #9
0
def __main__(mids: Set[int]):
    session = DBSession()
    for i in mids:
        mid = {'mid': i}
        res: HTTPResponse = selfusepy.get(
            'https://api.bilibili.com/x/space/acc/info', **mid)
        isUpdated: bool = False

        try:
            resData: UserProfile = selfusepy.parse_json(
                res.data, UserProfile())
            dbData: UserProfileDO = session.query(UserProfileDO).filter(
                UserProfileDO.mid == i).first()
            if dbData:  # 存在
                resDO: UserProfileDO = UserProfileDO(resData)
                for item in vars(dbData).items():
                    """
          将获取到的信息与db中的数据进行对比更新
          """
                    if item[0].startswith('_') or item[0] == "fans":
                        """
            由于它是一个由sqlalchemy更改过的DO类, 会有一些sqlalchemy需要的属性, 
            但我们并不需要的属性, 剔除掉
            配合更新fans的方法, 在此不对fans变量进行处理
            """
                        continue
                    try:
                        newValue = getattr(resDO, item[0])
                        if newValue != item[1]:
                            isUpdated = True
                            log.info(
                                '[UPDATE] mid: %s, key: %s, new: %s, old: %s' %
                                (i, item[0], newValue, item[1]))
                            setattr(dbData, item[0], newValue)
                    except BaseException as e:
                        raise e
                if not isUpdated:
                    log.info('[EQUAL] mid: %s' % i)
            else:
                log.info('[INSERT] mid: %s' % i)
                session.add(UserProfileDO(resData))

            session.commit()
        except BaseException as e:
            log.error('mid: %s, data: %s' % (i, res.data))
            raise e
        finally:
            log.info('[SLEEP] 2s')
            time.sleep(2)

    session.close()
Exemple #10
0
def read_file(dir: str, _map: MutableMapping[str, AV] = None) -> MutableMapping[str, AV]:
  if _map is None:
    _map = {}

  dir_or_files = os.listdir(dir)
  for item in dir_or_files:
    if not item.startswith("."):
      current_path = dir + item
      if os.path.isdir(current_path):
        current_path += "/"
        read_file(current_path, _map)
      else:
        s: str = open(current_path, "r", encoding = "utf-8").read()
        _map[current_path] = selfusepy.parse_json(s, AV())

  return _map
Exemple #11
0
async def query_all_cid_of_av(avInfo: AVInfoDO):
    global i_for_queryAllCidOfAv, Last_Request_Time
    log.info('[START] i: %s' % i_for_queryAllCidOfAv)

    delta = (Last_Request_Time + REQUEST_TIME_DELTA -
             time.time_ns()) / 1000_000_000
    time.sleep(delta if delta > 0 else 0)
    i_for_queryAllCidOfAv += 1

    log.info('[REQUEST] av\'s cids, aid: %s' % avInfo.aid)

    Last_Request_Time = time.time_ns()
    res: HTTPResponse = await selfusepy.get_async(
        'https://www.bilibili.com/widget/getPageList?aid=' + str(avInfo.aid))
    map: MutableMapping[int, AvDanmakuCid] = {}
    session = DBSession()
    try:
        j = json.loads(res.data)
        if isinstance(json.loads(res.data), list):
            for item in j:
                map[item['cid']] = selfusepy.parse_json(
                    json.dumps(item), AvDanmakuCid())
        log.info('[REQUEST] Done')
        log.info('[DATA] aid: %s, cid len: %s' % (avInfo.aid, map.__len__()))

        # 删除已经保存aid-cid的对应关系
        sql: str = 'select cid from av_cids where aid = %s and cid in (%s)' % (
            avInfo.aid, ','.join('%s' % item for item in map.keys()))
        r: ResultProxy = await execute_sql(sql)
        exist: Set[int] = set()
        for item in r.fetchall():
            exist.add(item[0])

        for item in map.items():
            if not exist.__contains__(item[0]):
                session.add(AVCidsDO(avInfo.aid, item[1]))
        session.commit()
        await filter_cid_which_isexist(avInfo.aid, map)
    except BaseException as e:
        log.error('aid: %s' % avInfo.aid)
        raise e
    finally:
        session.close()
Exemple #12
0
def getting_data() -> ({str: str}, List[int], Set[int]):
    log.info('[START] Getting top AVs at bilibili.com')
    res = selfusepy.get(url='https://www.bilibili.com/video/online.html',
                        head=chromeUserAgent)
    soup: BeautifulSoup = BeautifulSoup(markup=str(res.data,
                                                   encoding='utf-8').replace(
                                                       '\\n', ''),
                                        features='lxml')

    # 获取其中包含的json
    scripts: List[Tag] = soup.find_all(name='script')

    # 需要的script标签
    script: Tag = scripts[6]  # 一般在第5个, 但是有可能会改, 比如20/4/13 17:43:41从第5改为了第6
    if script.__str__().__contains__("onlineList"):
        pass
    else:
        for tag in scripts:
            if tag.__str__().__contains__("onlineList"):
                script = tag
                break
    pattern = re.compile(r'{([\s\S]*)\};')
    value = pattern.findall(script.prettify())
    temp = '{' + str(value[0]).replace('\\n', '') + '}'  # remove \n
    file_name = '%s/online/%s.json' % (config.date, time.time_ns())
    file_path = 'data-temp/%s' % file_name
    _file.save(temp, file_path)

    # pre processing data
    aidList: List[int] = list()
    midSet: Set[int] = set()
    obj: AV = selfusepy.parse_json(temp, AV())
    for item in obj.onlineList:
        aidList.append(item.aid)
        midSet.add(item.owner.mid)

    log.info('[DONE] Getting data. [SLEEP] 2s')
    time.sleep(2)
    return {file_name: file_path}, aidList, midSet
Exemple #13
0
def json_test_8() -> bool:
    print("多级; 不同变量名; variable handler")
    with open("./jsontest/eg7.json", "r") as f:
        s = f.read()
    obj: Obj = selfusepy.parse_json(s, Obj())
    return isinstance(obj, Obj)
Exemple #14
0
def get_object(key: str) -> Object:
    d: dict = s3_client.get_object(Bucket=bucket, Key=key)
    obj = selfusepy.parse_json(json.dumps(d, default=str), Object())
    obj.Body = d['Body']
    return obj