def test_get_python_from_file(): rows = file_util.readRows( "/home/changshuai/Temp/5dcdd0a7-8842-4695-8fd8-9553b191d346.tmp") data = [] for row in rows: datum = json_util.to_python(row) if datum["jobName"] == "yili_ecomm_v2_20190605131709_003_27": data.append(datum) print(data)
def getAccordance( file_name1: str, file_name2: str, base_dir="/home/changshuai/PycharmProjects/dio_core/Test/Data/{}"): """ 获取准确率 :param base_dir: :param file_name1: 浏览器数据 :param file_name2: 爬虫数据 :return: """ file_name1 = file_name1 if file_name1.startswith("/") else base_dir.format( file_name1) file_name2 = file_name2 if file_name2.startswith("/") else base_dir.format( file_name2) row1 = set(file_util.readRows(file_name1)) row2 = set(file_util.readRows(file_name2)) print("交集: {}".format(len(row1 & row2))) print(len(row1 & row2) / len(row1))
def compareUrls( file_name1, file_name2, base_dir="/home/changshuai/PycharmProjects/dio_core/Test/Data/{}"): """ 比较Url :param base_dir: :param file_name1: url集1 :param file_name2: url集2 :return: """ file_name1 = file_name1 if file_name1.startswith("/") else base_dir.format( file_name1) file_name2 = file_name2 if file_name2.startswith("/") else base_dir.format( file_name2) row1 = set(file_util.readRows(file_name1)) row2 = set(file_util.readRows(file_name2)) print("交集1: {}".format(len(row1))) print("交集2: {}".format(len(row2))) print("交集: {}".format(len(row1 & row2))) print(len(row1 & row2) / len(row1 | row2))
from dio_core.network.downloader import Downloader from dio_core.utils import file_util, time_util rows = file_util.readRows( "/home/changshuai/PycharmProjects/dio_core/Test/Data/kill_job_urls.txt") for row in rows: url = "http://api.rhino.datatub.com/common/job/kill?job_id={}&token=5fa92f2a597cc60201780504be1028a7".format( row) res = Downloader.get(url) print(row, res.text, url) time_util.sleep(3)
import time from dio_core.network.downloader.downloader import Downloader from dio_core.utils import file_util rows = list( file_util.readRows( "/home/changshuai/PycharmProjects/dio_core/dio_core_test/Data/cli_kill_job_list.txt" )) # rows = ["app_ecomm_all_20190802141514_042_83", "app_ecomm_all_20190802141516_952_91"] host = "datatub5:20425" # host = "api.rhino.datatub.com" for row in rows: try: res = Downloader.get( "http://{}/common/job/kill?job_id={}&token=5fa92f2a597cc60201780504be1028a7" .format(host, row)) print(res.json()) except Exception as e: print(e) time.sleep(1)
from dio_core.utils import file_util rows = file_util.readRows("/home/changshuai/PycharmProjects/dio_core/dio_core_test/Data/cp_3539-changshuai_20190722.data") lastWord = "" curWord = "" for ind, row in enumerate(rows): if ind % 2 == 0: lastWord = row continue else: curWord = row if curWord != lastWord: print(curWord, lastWord)
import time import traceback from dio_core.network.downloader.downloader import Downloader from dio_core.network.downloader.downloader import Setting from dio_core.utils import file_util, parse_util, json_util from dio_core_test.utils import text_util setting = Setting() setting.headers["Cookie"] = ("DSCKID=91aecd4c-9d62-49ad-ae1b-9eb177c787ac; JSESSIONID=5AD6666BE97FEC415491055AFAFA60FE;" " seraph.rememberme.cookie=13124%3A5ad60cddb478faeca22570e7f156f07e5138011a; atlassian.xsr" "f.token=BP2B-R8C4-N6CQ-HZD4_9ab65e787a0932dd3b1abcc52a792e40d154415c_lin; jira.editor.use" "r.mode=wysiwyg") setting.htmlParse = True rows = list(file_util.readRows("/home/changshuai/PycharmProjects/dio_core/dio_core_test/Data/JIRA_LIST.txt")) allMsg = [] for url in rows: try: res = Downloader.get(url, setting) repeatsText = text_util.get_first_match(res.text, "WRM._unparsedData\[\"activity-panel-pipe-id\"\]=\"(.*)\";") repeats = repeatsText.encode("utf-8").decode("unicode-escape").encode("utf-8").decode("unicode-escape").replace("\\/", "/") soup = parse_util.get_bs4_soup(repeats.strip("\"")) [_.extract() for _ in res.soup.select_one("#description-val").select(".user-hover")] msgInfo = { "title": res.soup.select_one("#summary-val").text.strip(), "id": text_util.get_first_match(url, "/(CP-\d+)").strip(),
from ftplib import FTP from dio_core.utils import file_util ftp = FTP() ftp.set_debuglevel(2) ftp.connect("120.31.140.156", 21) ftp.login("datastory", "datastoryFtp@2016") ftp.cwd("/temp/2012-06-06") files = ftp.nlst() rows = file_util.readRows( "/home/changshuai/Back/PythonProject/PyWork/data/ftp_file_path.txt") i = 0 r = 0 for row in rows: if row not in files: i += 1 print(row, i, r) r += 1 print(i)