def reset(self, start_at):
     if start_at > 0:
         if self.json_path is not None:
             if self.json_path.exists():
                 self.H = piop.read_json(self.json_path)
                 for k in self.H.keys():
                     self.H[k] = self.H[k][:start_at]
Example #2
0
def batch_process(batch_file: str):
    data = piop.read_json(batch_file)
    for item in data:
        node = create_node(item)
        graph.push(node)
    for item in data:
        rlt = RelationUpdate(item)
        rlt.update_all()
Example #3
0
def check_repeat_url():
    # 重复 url 检验
    filelist = os.listdir(urlist_path)
    for file in filelist:
        if file[0] == '.':
            continue
        data = piop.read_json(os.path.join(urlist_path, file))
        fpath = os.path.join(html_path, data['position'])
        urlist = data['urlist']
        if len(urlist) != len(set(urlist)):
            print(file, len(set(urlist)), len(urlist))
        filenum = len([_ for _ in os.listdir(fpath) if _[0] != '.'])
        urlnum = len(urlist)
        if filenum != urlnum:
            print(file, filenum, urlnum)
        if filenum != len(set(urlist)):
            print("wrong")
Example #4
0
def download():
    driver = webdriver.Chrome()
    filelist = os.listdir(urlist_path)
    for file in filelist:
        if file[0] == '.':
            continue
        data = piop.read_json(os.path.join(urlist_path, file))
        fpath = os.path.join(html_path, data['position'])
        piop.check_dir(fpath)
        urlist = data['urlist']
        for i, url in enumerate(urlist):
            fname = os.path.split(url)[-1]
            if os.path.exists(os.path.join(fpath, fname)):
                continue
            driver.get(url)
            ps = driver.page_source
            write_html(os.path.join(fpath, fname), ps)
            time.sleep(random.randint(1, 3))
            # 每 10 个重启一次
            if i % 10 == 0:
                driver.close()
                driver = webdriver.Chrome()
Example #5
0
def check():
    # 页面检查
    allist = []
    filelist = os.listdir(urlist_path)
    for file in filelist:
        if file[0] == '.':
            continue
        data = piop.read_json(os.path.join(urlist_path, file))
        fpath = html_path + data['position']
        urlist = list(set(data['urlist']))
        allist.extend(urlist)
    print(len(allist), len(set(allist)))

    allfiles = []
    for pname in os.listdir(html_path):
        if pname[0] == '.':
            continue
        for base_file in os.listdir(os.path.join(html_path, pname)):
            allfiles.append(base_file)
    print(len(allfiles), len(set(allfiles)))

    assert len(allfiles) == len(allist)
    assert len(set(allfiles)) == len(set(allist))
Example #6
0
def get_cate_res(cate_extract_file: str):
    cate_item = piop.read_json(cate_extract_file)
    dtimes, require, duty = [], [], []
    for item in cate_item:
        dtimes.append(item['dtime'])
        if len(item['require']) < 10:
            continue
        try:
            item_require = segpos(item['require'])
            require.extend(item_require)
        except Exception as e:
            print("GET ITEM ERROR.", e)
            continue
        try:
            item_duty = segpos(item['duty'])
            duty.extend(item_duty)
        except Exception as e:
            print("GET ITEM ERROR.", e)
            continue
    try:
        cate = item['category']
    except Exception as e:
        cate = ""
    return cate, duty, require, dtimes
Example #7
0
    return res


if __name__ == '__main__':
    segpos_files = sorted(os.listdir(segpos_path))
    res = pmag.MagicDict()
    for cate, _post in cate_gw.items():
        duty_list, require_list, dtimes_list = [], [], []
        tmp = pmag.MagicDict()
        for post, job in _post.items():
            tmp_duty_list, tmp_require_list, tmp_dtimes_list = [], [], []
            tag = cate + "_" + post
            for file in segpos_files:
                if tag in file:
                    fname = os.path.join(segpos_path, file)
                    cate_data = piop.read_json(fname)
                    tmp_duty_list.extend(cate_data['duty'])
                    tmp_require_list.extend(cate_data['require'])
                    tmp_dtimes_list.extend(cate_data['dtimes'])

            tmp_duty = get_common(tmp_duty_list, IGNORE, NEEDPOS)
            tmp_require = filter_require(
                tmp_duty, get_common(tmp_require_list, IGNORE, NEEDPOS))

            tmp[post]['duty'] = get_need_item(tmp_duty)
            tmp[post]['require'] = get_need_item(tmp_require)
            tmp[post]['demand'] = get_dtime_item(tmp_dtimes_list)

            duty_list.extend(tmp_duty_list)
            require_list.extend(tmp_require_list)
            dtimes_list.extend(tmp_dtimes_list)
Example #8
0
def test_read_json():
    data = read_json(os.path.join(DATA_PATH, 'json.json'))
    assert type(data) == dict
    assert data == {"json1": "this is line 1", "json2": "这是第二行。"}
Example #9
0
import json
import math
import os
from pnlp import piop, pmag

ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
MODEL_PATH = os.path.join(ROOT_PATH, "data", "model.txt")
IGNORE_PATH = os.path.join(ROOT_PATH, "data", "ignore.txt")

MODEL = piop.read_json(MODEL_PATH)
IGNORE = piop.read_lines(IGNORE_PATH)


def get_demand_normfactor():
    """
    Get demand normalization factor
    Parameters
    -----------
    
    Returns
    --------
        demand normalization factor, int type
    """
    csfs, ilfs, factors = [], [], []
    for cate, _others in MODEL.items():
        for post, others in _others['posts'].items():
            item = MODEL[cate]['posts'][post]['demand']
            csf = item.get('continuous_freq', 0)
            ilf = item.get('interval_freq', 0)
            phf = item.get('publish_freq', 0)
            # factor = (csf + ilf) / 2 * phf