def main(): json = list() for i in tqdm(range(1, 27)): print('------page %d start--------' % i) html = BeautifulSoup( requests.get('https://cba.hupu.com/players/players.php?sort=%d' % i).content, 'html.parser') for tr in html.find_all('tr', attrs={'style': 'color:#990000'}): td = tr.find_all('td') num = td[0].get_text() name = td[1].find('a').get_text() page = td[1].find('a')['href'] team = td[2].get_text() height = td[3].get_text().split('CM', 1)[0] weight = td[4].get_text().split('KG', 1)[0] pos = td[5].get_text() birth = td[6].get_text( ) #(td[6].get_text() is not '') and datetime.strptime(td[6].get_text(),'%Y年%m月%d日') or datetime.strftime('1900年1月1日','%Y年%m月%d日') msg = { 'Num': num, 'Name': name, 'Team': team, 'Height': height, 'Weight': weight, 'Pos': pos, 'Brith': birth, 'Page': page } json.append(msg) print(msg) for tr in html.find_all('tr', attrs={'style': 'color: #990000'}): td = tr.find_all('td') num = td[0].get_text() name = td[1].find('a').get_text() page = td[1].find('a')['href'] team = td[2].get_text() height = td[3].get_text().split('CM', 1)[0] weight = td[4].get_text().split('KG', 1)[0] pos = td[5].get_text() birth = td[6].get_text( ) #(td[6].get_text() is not '') and datetime.strptime(td[6].get_text(),'%Y年%m月%d日') or datetime.strftime('1900年1月1日','%Y年%m月%d日') msg = { 'Num': num, 'Name': name, 'Team': team, 'Height': height, 'Weight': weight, 'Pos': pos, 'Brith': birth, 'Page': page } json.append(msg) print(msg) print('------page %d compeleted, sleep 20s------' % i) time.sleep(20) print('encoding...') demjson.encode_to_file(json_name, json)
def __update_local_db(self, version, code): self.db_dict[version] = code ver_array = [] for k, v in self.db_dict.items(): ver_array.append({ 'version': k, 'code': v, }) demjson.encode_to_file('database.json', ver_array, overwrite=True)
def main(): ap = argparse.ArgumentParser() ap.add_argument("annotation", help='Annotation of time series in custom JSON format') ap.add_argument("spreadsheet", help='Excel spreadsheet file') ap.add_argument("outfile", help='file to write results') args = ap.parse_args() es = ExtractSpreadsheet(args.spreadsheet, args.annotation) timeseries = es.process() demjson.encode_to_file(args.outfile, timeseries, overwrite=True)
def remove_json_by_channel_id(self, channel_id, to_file): to_json_list = json.load(file(to_file)) for item in to_json_list: if item["channel_id"] == channel_id: to_json_list.remove(item) break demjson.encode_to_file(to_file, to_json_list, overwrite=True)
def remove_json(self, base_json_file, to_file): base_json = json.load(file(base_json_file)) if isinstance(base_json, dict): to_json_list = json.load(file(to_file)) for item in to_json_list: if item == base_json: to_json_list.remove(item) break demjson.encode_to_file(to_file, to_json_list, overwrite=True)
def save(stock_id, a_list): file_path = os.path.join('./data', stock_id + '.json') return demjson.encode_to_file(file_path, a_list, encoding='utf-8', overwrite=True, compactly=False)
def insert_json(self, base_json_file, to_file): base_json = json.load(file(base_json_file)) if isinstance(base_json, dict): channel_id = base_json["channel_id"] to_json_list = json.load(file(to_file)) for item in to_json_list: if item["channel_id"] == channel_id: to_json_list.remove(item) break to_json_list.append(base_json) demjson.encode_to_file(to_file, to_json_list, overwrite=True)
def backup(cursor, n, i): if i == proNum-1: c = sqlCount - n * i else: c = n # Select sql = selectSql % (conf["table"], c, i*n) logging.log(logging.DEBUG, "SQL: %s", sql) cursor.execute(sql) data = cursor.fetchall() # Encode logging.log(logging.INFO, "Start Child Process %d, have %d data, save data to file[bak%d.txt]...", i, len(data), i) json.encode_to_file("bak"+str(i)+".txt", data, overwrite=True) logging.log(logging.INFO, "Child Process %d Exit...", i)
def main(): email = raw_input("Please enter your uyan login email:") password = raw_input("Please enter your uyan password:"******"parent_uname"] del c_comment["parent_uname"] for p_comment in comments: if p_comment['uname'] == parent_uname and c_comment["url"] == p_comment["url"]: if not 'child' in p_comment.keys(): p_comment['child'] = [c_comment] else: p_comment['child'].append(c_comment) break print("Your comments will saved to comments.json.") demjson.encode_to_file("comments.json", comments, 'utf-8', True, indent_amount=2, compactly=False) print("Finish !!!! Now you can open comments.json to see the comments")
def getCode(self): codeUrl = 'http://uems.sysu.edu.cn/elect/login/code' loginUrl = "http://uems.sysu.edu.cn/elect/login" print u'加载验证码...' while True: try: open(get_desktop() + "\code.jpeg", "wb").write(urllib2.urlopen(codeUrl).read()) try: im = Image.open(get_desktop() + '\code.jpeg').convert('L') im = im.point(lambda x: 255 if x > 128 else x) im = im.point(lambda x: 0 if x < 255 else 255) box = (2, 2, im.size[0] - 2, im.size[1] - 2) im = im.crop(box) j_code = pytesser.image_to_string(im).replace( ' ', '').replace(']', 'J').replace('0', 'O').strip().upper() print u'自动识别验证码...' except Exception, e: j_code = raw_input( u'请输入桌面的code.jpeg所对应的验证码(不分大小写): '.encode( self.coding)).upper().strip() postData = 'username='******'&password='******'&j_code=' + j_code + '<=&_eventId=submit&gateway=true' res = urllib2.urlopen(loginUrl, postData) html = BeautifulSoup(res.read(), "html.parser") self.sid = html.select('input[id=sid]')[0]['value'] if self.sid != '': self.xnd = html.select('input[id=xnd]')[0]['value'] self.xq = html.select('input[id=xq]')[0]['value'] print u'登录成功' if os.path.exists(get_desktop() + '\student.txt') == False: demjson.encode_to_file(get_desktop() + '\student.txt', {self.stuNum: self.password}) print u'账号密码已存于桌面文件student.txt,下次可直接登录!!!\n' break except urllib2.HTTPError, e: print u'登录失败,重新加载验证码...' time.sleep(1.0)
def backup(cursor, n, i): if i == proNum - 1: c = sqlCount - n * i else: c = n # Select sql = selectSql % (conf["table"], c, i * n) logging.log(logging.DEBUG, "SQL: %s", sql) cursor.execute(sql) data = cursor.fetchall() # Encode logging.log( logging.INFO, "Start Child Process %d, have %d data, save data to file[bak%d.txt]...", i, len(data), i) json.encode_to_file("bak" + str(i) + ".txt", data, overwrite=True) logging.log(logging.INFO, "Child Process %d Exit...", i)
def generate_post_data(source_data): whether_signed = False model_data = json.decode_file("./json/model.json") current_date = get_current_date(TIME_ZONE) # current_date_time = current_date + ' 00:00:00' yesterday_date = get_yesterday_date(TIME_ZONE) yesterday_date_time = yesterday_date + ' 09:00:00' # current_timestamp = get_current_stamp() # if you didn't click the "暂存" button if "535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record" in source_data["body"][ "dataStores"]: source_record = source_data["body"]["dataStores"][ "535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record"]["rowSet"]["primary"][0] model_data["body"]["dataStores"][ "535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record"] = source_data["body"][ "dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record"] print("today is " + source_record['SBSJ_STR'][0:10]) if source_record['SBSJ_STR'][0:10] == current_date: whether_signed = True # model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record"]["rowSet"]["primary"][0]["CLSJ"] = current_timestamp # model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record"]["rowSet"]["primary"][0]["SBSJ"] = current_timestamp else: source_record = source_data["body"]["dataStores"][ "535b1ef6-bf51-4d4c-9ae4-5a90cdc4"]["rowSet"]["primary"][0] del model_data["body"]["dataStores"][ "535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record"] del model_data["body"]["dataStores"][ "535b1ef6-bf51-4d4c-9ae4-5a90cdc4"]["rowSet"]["primary"][0]["_o"] zh = source_record["ZH"] # student id xm = source_record["XM"] # student name xsxb = source_record["XSXB"] # student sex nl = source_record["NL"] # student age szdw = source_record["SZDW"] # student school zymc = source_record["ZYMC"] # student major xslx = source_record["XSLX"] # student type zxsj = source_record["ZXSJ"] # student phone number sbsj = current_date # date fdyxmx = source_record["FDYXMX"] # teacher name jjlxrxm = source_record["JJLXRXM"] # parent name jjlxrdh = source_record["JJLXRDH"] # parent phone number jjlxrybrgx = source_record["JJLXRYBRGX"] # parent rel. lxzt = source_record["LXZT"] # current city dqsfjjia = source_record["DQSFJJIA"] # at home or not? sheng_text = source_record["sheng_TEXT"] # provience text sheng = source_record["sheng"] # provience shi_text = source_record["shi_TEXT"] # city text shi = source_record["shi"] # city quxian_text = source_record["quxian_TEXT"] # tone text quxian = source_record["quxian"] # tone dqjzdz = source_record["DQJZDZ"] # location clsj = yesterday_date_time # temp. time # SYS_USER = source_vars[0]["value"] # student name # SYS_UNIT = source_vars[1]["value"] # student unit # SYS_DATE = current_timestamp # current timestamp # ID_NUMBER = source_vars[3]["value"] # student id # USER_NAME = source_vars[4]["value"] # student name # XB = source_vars[5]["value"] # student sex # SZYX = source_vars[6]["value"] # student school # ZYMC = source_vars[7]["value"] # student major # MOBILE = source_vars[8]["value"] # mobile model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["ZH"] = zh model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["XM"] = xm model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["XSXB"] = xsxb model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["NL"] = nl model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["SZDW"] = szdw model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["ZYMC"] = zymc model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["XSLX"] = xslx model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["ZXSJ"] = zxsj model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["SBSJ"] = sbsj model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["FDYXMX"] = fdyxmx model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["JJLXRXM"] = jjlxrxm model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["JJLXRDH"] = jjlxrdh model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["JJLXRYBRGX"] = jjlxrybrgx model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["LXZT"] = lxzt model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["DQSFJJIA"] = dqsfjjia model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["sheng_TEXT"] = sheng_text model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["sheng"] = sheng model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["shi_TEXT"] = shi_text model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["shi"] = shi model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["quxian_TEXT"] = quxian_text model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["quxian"] = quxian model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["DQJZDZ"] = dqjzdz model_data["body"]["dataStores"]["535b1ef6-bf51-4d4c-9ae4-5a90cdc4"][ "rowSet"]["primary"][0]["CLSJ"] = clsj model_data["body"]["dataStores"]["variable"] = source_data["body"][ "dataStores"]["variable"] model_data["body"]["parameters"] = source_data["body"]["parameters"] json.encode_to_file("./json/example.json", model_data, overwrite=True) return model_data, whether_signed
def generate_post_data(source_data): new_key = {} for code in source_data["body"]["dataStores"]: new_key[code] = len(code) sorted_key = sorted(new_key.items(), key=lambda x: x[1]) # print("sorted_key",sorted_key) # print("sorted_key[1][0]",sorted_key[1][0]) # This code may change aperiodically, we don't know how it works. unknown_code = sorted_key[1][0] rowSetName = source_data["body"]["dataStores"][unknown_code]["rowSetName"] whether_signed = False json_file = open("./json/model.json", encoding='utf-8') model_txt = json_file.read(-1) json_file.close() # add {var} feature model_txt = model_txt.replace(r"{Temp}", "%.1f" % get_random_temp()) model_data = json.decode(model_txt) # model_data = json.decode_file("./json/model.json") current_date = get_current_date(TIME_ZONE) # current_date_time = current_date + ' 00:00:00' yesterday_date = get_yesterday_date(TIME_ZONE) yesterday_date_time = yesterday_date + ' 09:00:00' # current_timestamp = get_current_stamp() unknown_code_use = unknown_code unknown_code_record = unknown_code + "_record" # print("unknown_code_use",unknown_code_use) new_model_data = {} new_model_data["header"] = model_data["header"] new_model_data["body"] = {} new_model_data["body"]["parameters"] = model_data["body"]["parameters"] new_model_data["body"]["dataStores"] = {} new_model_data["body"]["dataStores"]["variable"] = model_data["body"][ "dataStores"]["variable"] new_model_data["body"]["dataStores"][unknown_code_use] = model_data[ "body"]["dataStores"]['535b1ef6-bf51-4d4c-9ae4-5a90cdc4'] new_model_data["body"]["dataStores"][unknown_code_record] = model_data[ "body"]["dataStores"]['535b1ef6-bf51-4d4c-9ae4-5a90cdc4_record'] new_model_data["body"]["dataStores"][unknown_code_use][ "name"] = unknown_code_use new_model_data["body"]["dataStores"][unknown_code_use]["parameters"][ "queryds"] = unknown_code_use new_model_data["body"]["dataStores"][unknown_code_record][ "name"] = unknown_code_record new_model_data["body"]["dataStores"][unknown_code_record]["parameters"][ "queryds"] = unknown_code_use new_model_data["body"]["dataStores"][unknown_code_use][ "rowSetName"] = rowSetName new_model_data["body"]["dataStores"][unknown_code_record][ "rowSetName"] = rowSetName json.encode_to_file("./json/new_model.json", new_model_data, overwrite=True) model_data = new_model_data # if you didn't click the "暂存" button if unknown_code_record in source_data["body"]["dataStores"]: source_record = source_data["body"]["dataStores"][unknown_code_record][ "rowSet"]["primary"][0] model_data["body"]["dataStores"][unknown_code_record] = source_data[ "body"]["dataStores"][unknown_code_record] print("today is " + current_date) if source_record['SBSJ_STR'][0:10] == current_date: whether_signed = True # model_data["body"]["dataStores"][unknown_code_record]["rowSet"]["primary"][0]["CLSJ"] = current_timestamp # model_data["body"]["dataStores"][unknown_code_record]["rowSet"]["primary"][0]["SBSJ"] = current_timestamp else: source_record = source_data["body"]["dataStores"][unknown_code_use][ "rowSet"]["primary"][0] del model_data["body"]["dataStores"][unknown_code_record] del model_data["body"]["dataStores"][unknown_code_use]["rowSet"][ "primary"][0]["_o"] zh = source_record["ZH"] # student id xm = source_record["XM"] # student name xsxb = source_record["XSXB"] # student sex nl = source_record["NL"] # student age szdw = source_record["SZDW"] # student school zymc = source_record["ZYMC"] # student major xslx = source_record["XSLX"] # student type zxsj = source_record["ZXSJ"] # student phone number sbsj = current_date # date fdyxmx = source_record["FDYXMX"] # teacher name jjlxrxm = source_record["JJLXRXM"] # parent name jjlxrdh = source_record["JJLXRDH"] # parent phone number jjlxrybrgx = source_record["JJLXRYBRGX"] # parent rel. lxzt = source_record["LXZT"] # current city dqsfjjia = source_record["DQSFJJIA"] # at home or not? sheng_text = source_record["sheng_TEXT"] # provience text sheng = source_record["sheng"] # provience shi_text = source_record["shi_TEXT"] # city text shi = source_record["shi"] # city quxian_text = source_record["quxian_TEXT"] # tone text quxian = source_record["quxian"] # tone dqjzdz = source_record["DQJZDZ"] # location clsj = yesterday_date_time # temp. time # SYS_USER = source_vars[0]["value"] # student name # SYS_UNIT = source_vars[1]["value"] # student unit # SYS_DATE = current_timestamp # current timestamp # ID_NUMBER = source_vars[3]["value"] # student id # USER_NAME = source_vars[4]["value"] # student name # XB = source_vars[5]["value"] # student sex # SZYX = source_vars[6]["value"] # student school # ZYMC = source_vars[7]["value"] # student major # MOBILE = source_vars[8]["value"] # mobile model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "ZH"] = zh model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "XM"] = xm model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "XSXB"] = xsxb model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "NL"] = nl model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "SZDW"] = szdw model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "ZYMC"] = zymc model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "XSLX"] = xslx model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "ZXSJ"] = zxsj model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "SBSJ"] = sbsj model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "FDYXMX"] = fdyxmx model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "JJLXRXM"] = jjlxrxm model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "JJLXRDH"] = jjlxrdh model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "JJLXRYBRGX"] = jjlxrybrgx model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "LXZT"] = lxzt model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "DQSFJJIA"] = dqsfjjia model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "sheng_TEXT"] = sheng_text model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "sheng"] = sheng model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "shi_TEXT"] = shi_text model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "shi"] = shi model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "quxian_TEXT"] = quxian_text model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "quxian"] = quxian model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "DQJZDZ"] = dqjzdz model_data["body"]["dataStores"][unknown_code_use]["rowSet"]["primary"][0][ "CLSJ"] = clsj model_data["body"]["dataStores"]["variable"] = source_data["body"][ "dataStores"]["variable"] model_data["body"]["parameters"] = source_data["body"]["parameters"] json.encode_to_file("./json/example.json", model_data, overwrite=True) return model_data, whether_signed
valid_locations = [] for e in locations: valid_locations.append(e['name']) for f in links: if f['loc1'] or f['loc2'] not in valid_locations: for attribute, value in f.items(): print('{} : {}'.format(attribute, value)) def validate_check_duplicates(locations): s = set([]) duplicates = 0 for g in locations: if g['name'] in s: duplicates += 1 else: s.add(g['name']) print duplicates locations = open_locations() create_location("forest", "endless trees", locations) create_location("swamp", "endless swamp", locations) links = open_links() create_link("forest", "swamp", "north", links) validate_check_duplicates(locations) validate_links(links, locations) demjson.encode_to_file("locations.json",locations, overwrite=True) demjson.encode_to_file("links.json", links, overwrite=True)
def to_file(self, filename): if not CFile.check_and_create_directory(filename): raise PathNotCreateException(CFile.file_path(filename)) demjson.encode_to_file(filename, self.__json_obj, overwrite=True)
comment_url = comment["url"] if not (comment_url in p_time_dict.keys()): print("comment_url " + comment_url + " have no post url ") return post_time = datetime.strptime(p_time_dict[comment_url], '%Y-%m-%d %H:%M:%S') comment_time = comment_time.replace(year=post_time.year) if comment_time < post_time: comment_time = comment_time.replace(year=post_time.year + 1) comment["time"] = comment_time.strftime('%Y-%m-%d %H:%M:%S') for comment in comments_list: correct_comment_time(comment) if not ("child" in comment.keys()): continue child_comments = comment["child"] for c_comment in child_comments: correct_comment_time(c_comment) print("Your comments will saved to comments_correct_time.json.") demjson.encode_to_file("comments_correct_time.json", comments_list, 'utf-8', True, indent_amount=2, compactly=False) print( "Finish !!!! Now you can open comments_correct_time.json to see the comments" )