Example #1
0
    def get_urls_pdfs(self):
        with open("I:\PythonPrj\StandardSpider\Spider\Rohm\ChipResistorNetworks\htmlcode.html", "r",
                  encoding="utf-8") as f:
            content = f.read()
        bs_content = BeautifulSoup(content, "html.parser")
        all = bs_content.find_all(name="td", attrs={"align": "left", "class": "part-name PartNumber"})
        pdfs_urls = []
        for one in all:
            tag_url_pdf = one.find_all(name="div")
            url_code = tag_url_pdf[0].a
            code = url_code.text

            orcl_con = OracleConnection()
            cursor = orcl_con.conn.cursor()
            cursor.execute("select cc_id from product$component_crawl where cc_code='{}'".format(code))
            data = cursor.fetchone()
            if data:
                print("repeat")
                continue
            cursor.close()
            orcl_con.conn.close()

            url = Rohm_Pre_Url + url_code.get("href")
            try:
                pdf = tag_url_pdf[1].a.get("link")
            except:
                pdf = ""
            pdf_url = (code, url, pdf,)
            pdfs_urls.append(pdf_url)
        return pdfs_urls
Example #2
0
    def get_product_list(self):
        series_contents = self.get_all_content()
        urls = []
        codes = []
        for series_content in series_contents:
            rough_urls_codes = series_content.find_all(
                name="a",
                attrs={"href": re.compile(r'/ac/c/search_num/index\.jsp')})
            for rough_url_code in rough_urls_codes:
                code = rough_url_code.text

                orcl_con = OracleConnection()
                cursor = orcl_con.conn.cursor()
                cursor.execute(
                    "select cc_id from product$component_crawl where cc_code='{}'"
                    .format(code))
                data = cursor.fetchone()
                if data:
                    print("repeat")
                    continue
                cursor.close()
                orcl_con.conn.close()

                rough_url = rough_url_code.get("href")
                re_url = re.match(
                    r'(/ac/c/search_num/index\.jsp).*?(\?c=detail&part_no=.*$)',
                    rough_url)
                url = Pre_Panasonic_Device_Url + re_url.group(
                    1) + re_url.group(2)
                codes.append(code)
                urls.append(url)
        return urls, codes
Example #3
0
 def get_task_id(self):
     orcl_conn = OracleConnection()
     cursor = orcl_conn.conn.cursor()
     cursor.execute(
         "select cct_id from product$component_crawl_task where cct_taskid='{}'".format(self.task_code))
     task_id = cursor.fetchone()[0]
     cursor.close()
     return task_id
Example #4
0
    def __init__(self, task_code):
        self.task_code = task_code

        self.proxy_pool = ProxyPool()
        self.proxy_ip = self.proxy_pool.get()

        self.path = "..\\tmp\\"
        if not os.path.exists(self.path):
            os.mkdir(self.path)
        self.db = OracleConnection()
Example #5
0
    def get_code_urls(self, series_url):
        def get_pages_urls(url):
            html_analyse = HtmlAnalyse(url, is_proxy=True)
            bs_content = html_analyse.get_bs_contents()
            page_tag = bs_content.find(name="a",
                                       attrs={"title": "到最后一页"},
                                       text="末页 »")
            try:
                rough_page = page_tag.get("href")
                page = re.match(r"/ea/products/.*?page=(\d+)&reset=1",
                                rough_page).group(1)
            except:
                page = 0
            page_urls = []
            for i in range(int(page) + 1):
                page_url = url + "&page=" + str(i)
                page_urls.append(page_url)
            return page_urls

        product_urls = []
        page_urls = get_pages_urls(series_url)
        if page_urls is None:
            return None
        for page_url in page_urls[:]:
            html_analyse = HtmlAnalyse(page_url)
            bs_contents = html_analyse.get_bs_contents()
            lists = bs_contents.find_all(
                name='tr', attrs={"class": re.compile(u"(^odd$)|(^even$)")})
            if not lists:
                continue
            for list in lists[1:]:
                try:
                    model = list.td.a
                    code = model.text
                except:
                    break

                # *******去重*******
                orcl_con = OracleConnection()
                cursor = orcl_con.conn.cursor()
                cursor.execute(
                    "select cc_id from product$component_crawl where cc_code='{}'"
                    .format(code))
                data = cursor.fetchone()
                if data:
                    print("repeat")
                    continue
                cursor.close()
                orcl_con.conn.close()
                # *******结束*******

                href = model.get("href")
                url = Panasonic_Pre_Url + href
                product_urls.append(url)
        return product_urls
Example #6
0
 def get_task_id(self):
     orcl_conn = OracleConnection()
     cursor = orcl_conn.conn.cursor()
     cursor.execute(
         "select cct_id from product$component_crawl_task where cct_taskid='{}'"
         .format(self.task_code))
     try:
         task_id = cursor.fetchone()[0]
         cursor.close()
         return task_id
     except:
         print("数据为空,请检查任务号,程序即将关闭")
         time.sleep(3)
Example #7
0
    def upload(self, filename, pdf_url):
        try:
            with open(filename, 'rb') as file:
                res = requests.post("http://10.10.100.200:9999/file/upload", files={'file': file})
                res_j = res.json()
            print("上传完成")
            db = OracleConnection()
            cursor = db.conn.cursor()
            cursor.execute(
                "update product$component_crawl set cc_b2c_attach='{}' where cc_attach='{}'".format(res_j['path'],
                                                                                                    pdf_url))
            cursor.close()
            db.conn.commit()
            db.conn.close()

        except Exception as e:
            print(e)
Example #8
0
def kill_session():
    conn = OracleConnection()
    cursor = conn.conn.cursor()
    cursor.execute(
        "select sess.sid,sess.serial#,lo.oracle_username,lo.os_user_name,ao.object_name,lo.locked_mode from v$locked_object lo, dba_objects ao, v$session sess where ao.object_id = lo.object_id and lo.session_id = sess.sid and os_user_name='zhanghy'"
    )
    sessions = cursor.fetchall()
    for session in sessions:
        sid = session[0]
        serial = session[1]
        cursor = conn.conn.cursor()
        try:
            sql = "alter system kill session '{},{}'".format(sid, serial)
            cursor.execute(sql)
            cursor.close()
        except Exception as e:
            print(e)
            continue

        print("kill success")

    conn.conn.close()
Example #9
0
 def __init__(self):
     self.path = "I:\PythonPrj\StandardSpider\\tmp\\"
     if not os.path.exists(self.path):
         os.mkdir(self.path)
     self.db = OracleConnection()
Example #10
0
"""
    @description:   
    @author:        RoyalClown
    @date:          2016/11/16
"""
from Lib.DBConnection.OracleConnection import OracleConnection

conn = OracleConnection()
cursor = conn.conn.cursor()
cursor.execute(
    "select sess.sid,sess.serial#,lo.oracle_username,lo.os_user_name,ao.object_name,lo.locked_mode from v$locked_object lo, dba_objects ao, v$session sess where ao.object_id = lo.object_id and lo.session_id = sess.sid and os_user_name='RoyalClown'"
)
sessions = cursor.fetchall()
for session in sessions:
    sid = session[0]
    serial = session[1]
    cursor = conn.conn.cursor()
    sql = "alter system kill session '{},{}'".format(sid, serial)
    cursor.execute(sql)
    cursor.close()

    conn.conn.commit()
    print("kill success")
Example #11
0
 def __init__(self, task_code):
     self.task_code = task_code
     self.path = "C:\img\\"
     if not os.path.exists(self.path):
         os.mkdir(self.path)
     self.db = OracleConnection()