def item_name_str(url):
    try:
        web_page = urq.urlopen(
            url
        )  # (Open the URL) .urlopen take string or Request obj as it parameter
        content = bs(web_page, 'html.parser')
        if url.find('amazon.com'
                    ) != -1:  # HTTP Error 503: Service Unavailable (st)
            item_name_str_L = content.find(
                id="productTitle").get_text().strip()
            content.find(id=[
                "priceblock_ourprice", "priceblock_dealprice",
                "priceblock_saleprice"
            ]).get_text()
        elif url.find('bhphotovideo.com') != -1:
            item_name_str_L = content.find(
                class_="title_3bJZzlB3PKkE_8ajs9mroe").get_text().strip()
            content.find(class_=["price_1DPoToKrLP8uWvruGqgtaY"]).get_text()
        elif url.find('bestbuy.com') != -1:  # Operational Time
            item_name_str_L = content.find(
                class_="heading-5 v-fw-regular").get_text().strip()
            content.find(
                class_=["priceView-hero-price priceView-customer-price"
                        ]).get_text()
        elif url.find('apple.com') != -1:  # HTTP Error 403: Forbidden
            item_name_str_L = content.find(
                class_="as-productdecision-header").get_text().strip()
            item_name_str_L = item_name_str_L.replace("Buy", "")
            content.find(
                class_=["as-price-currentprice", "current_price"]).get_text()
        return item_name_str_L
    except Exception as e:
        err_h.error_handler(e)
        return -1
def do_it():
    'do it'
    try:
        html = render_html()
        create_html( html )
    except:
        error_handler()
Beispiel #3
0
def dates_before_current(indi_list, fam_list):
    line_num = []
    id_list = []
    current = datetime.now()

    for people in indi_list:
        birt = datetime.strptime(people['BIRT'], '%Y-%m-%d')

        try:
            deat = datetime.strptime(people['DEAT'], '%Y-%m-%d')
        except ValueError:
            deat = datetime.now() - timedelta(1)

        if birt >= current or deat >= current:
            line_num.append(error_handler(people['INDI']))
            id_list.append(people['INDI'])

    for families in fam_list:
        marr = datetime.strptime(families['MARR'], '%Y-%m-%d')

        try:
            div = datetime.strptime(families['DIV'], '%Y-%m-%d')
        except ValueError:
            div = datetime.now() - timedelta(1)

        if marr >= current or div >= current:
            line_num.append(error_handler(families['FAM']))
            id_list.append(families['FAM'])

    if line_num:
        print('ERROR: INDIVIDUAL: US01: lines_num:', sorted(set(line_num)),
              ': indi_id:', sorted(set(id_list)),
              ': Dates must before current date!')
Beispiel #4
0
def process_user( user, utas, database, smtpserver ):
    'process one user'
    try:
        uta          = utas[ user[ 'send_index' ] ]
        mail_address = user[ 'mail_address' ]
        subject      = create_subject( uta )
        body         = create_body( user[ 'direction' ], uta,
                                    user[ 'column' ], user[ 'row' ] )
        mail         = create_message( body, subject, mail_address )
        if send_mail( mail, mail_address, smtpserver ):
            increment_send_index( database, mail_address )
    except:
        error_handler()
Beispiel #5
0
    def call_error_handler(self, kblang):
        error_handler_input = {}
        error_handler_input['cTopics'] = kblang['cTopics']
        error_handler_input['intent'] = kblang['dialogAct'][-1]
        error_handler_input['concept'] = kblang['concept']
        error_handler_output = error_handler.error_handler(error_handler_input)

        unCertain = False
        for i in error_handler_output:
            if i['inContext'] == 'Uncertain':
                kblang['pTopic'] = i['label']
                dialogAct = kblang['dialogAct']
                dialogAct.append('CheckQuestion')
                kblang['dialogAct'] = dialogAct
                concept = False
                with open('./dict.errorhandler', 'r') as f:
                    reader = csv.reader(f)
                    for line in reader:
                        if line[0] == kblang['property']:
                            concept = line[1]
                            prop = line[2]
                            break
                if concept:
                    kblang['concept'] = concept
                    kblang['property'] = prop
                else:
                    pass
                unCertain = True
                break
            else:
                pass

        return kblang, unCertain
Beispiel #6
0
def do_it():
    'do it'
    try:
        database   = database_open()
        day, hour  = get_day(), int( sys.argv[ 1 ] )
        users      = get_user_by_date( database, day, hour )
        if not users:
            database_close( database )
            return
        utas       = get_uta_by_indices(
            database, [ user[ 'send_index' ] for user in users ] )
        smtpserver = get_smtpserver()
        for user in users:
            process_user( user, utas, database, smtpserver )
        database_close( database )
        smtpserver.close()
    except:
        error_handler()
Beispiel #7
0
def correct_gender_role(indi_list, fam_list):
    line_num = []
    id_list = []

    for families in fam_list:
        husb_id = families['HUSB']
        wife_id = families['WIFE']

        for people in indi_list:

            if people['INDI'] == husb_id and people['SEX'] != 'M':
                line_num.append(error_handler(families['HUSB']))
                id_list.append(husb_id)

            if people['INDI'] == wife_id and people['SEX'] != 'F':
                line_num.append(error_handler(families['WIFE']))
                id_list.append(wife_id)

    if line_num:
        print('ERROR: INDIVIDUAL: US21: lines_num:', sorted(set(line_num)),
              ': indi_id:', sorted(set(id_list)),
              ': Gender and role are not correct!')
def process_1(url, user_price, mail_recipient_input):
    save_url_str = url
    save_item_name_str = item_name_str(url)
    if save_item_name_str == -1:
        return -1
    save_user_price_int = user_price_int(user_price)
    if save_user_price_int == -1:
        return -1
    save_recipients_str = mail_recipient_input
    if save_recipients_str.find('@') == -1:
        err_h.error_handler('recipients_err')
        return -1
    conn = sql.connect('tracking_item.db')
    c = conn.cursor()

    try:
        c.execute("""CREATE TABLE tracking_item (
			url_str text,
			item_name_str text, 
			user_price_int integer,
			recipients_str text
			)""")
    except sql.OperationalError:
        pass

    c.execute(
        """INSERT INTO tracking_item VALUES (:url_str,
	:item_name_str,:user_price_int, :recipients_str)""", {
            'url_str': save_url_str,
            'item_name_str': save_item_name_str,
            'user_price_int': save_user_price_int,
            'recipients_str': save_recipients_str
        })

    conn.commit()
    conn.close()
    return 1
Beispiel #9
0
def less_than_150(indi_list):
    indi_id = []
    line_num = []

    for people in indi_list:

        if people['AGE'] >= 150:
            line_num.append(error_handler(people['INDI']))
            indi_id.append(people['INDI'])

    if line_num:
        print('ERROR: INDIVIDUAL: US07: lines_num:', sorted(set(line_num)),
              ': indi_id:', sorted(set(indi_id)),
              ': Age must less than 150 years old')
        return 'BUG'
Beispiel #10
0
def marr_before_div(fam_list):
    line_num = []
    fam_id = []

    for families in fam_list:

        if families['DIV'] != 'NONE':
            marr = datetime.strptime(families['MARR'], '%Y-%m-%d')
            div = datetime.strptime(families['DIV'], '%Y-%m-%d')

            if marr >= div:
                line_num.append(error_handler(families['FAM']))
                fam_id.append(families['FAM'])

    if line_num:
        print('ERROR: FAMILY: US04: lines_num:', sorted(set(line_num)),
              ': fam_id:', sorted(set(fam_id)),
              ': Marriage date must before divorce date!')
Beispiel #11
0
    def locateChild(self, ctx, segments):
        ctx.remember(fourohfour.fourohfour(), inevow.ICanHandleNotFound)
        ctx.remember(error_handler.error_handler(), inevow.ICanHandleException)
        request = inevow.IRequest(ctx)
        request.setHeader('server', "AZTK - %s" % socket.gethostname())

        re_FEEDS = re.compile("^\/[a-z][_a-z0-9]{3,}\/feeds.*$")
        re_AVATAR = re.compile("^\/[a-z][_a-z0-9]{3,}\/avatar.*$")
        re_IMAGE = re.compile(
            "^\/[a-z][_a-z0-9]{3,}\/img\/(?:(?:\d+(?:x\d+)?(?:x\d)?)|(?:original))\/([a-z0-9]{32})(?:-[a-f0-9]{5})?\.jpg$"
        )

        if segments[0] == "css":
            return static_css(), segments[1:]
        elif segments[0] == "image":
            return static_image(), segments[1:]
        elif segments[0] == "download":
            return static_download(), segments[1:]
        elif segments[0] == "js":
            return static_js(), segments[1:]
        elif segments[0] == "RPC2":
            return self.zapi_handler, []
        elif segments[0] == "browser_check":
            return browser_check.browser_check(), segments[1:]
        elif segments[0] in static_files.files.keys():
            return static_files(), segments
        elif re_AVATAR.match(request.uri):
            return main_homepage.main_homepage(), segments
        elif re_FEEDS.match(request.uri):
            return main_homepage.main_homepage(), segments
        elif segments[0] == "qoop":
            return main_homepage.main_homepage(), segments
        else:
            if request.getCookie('browser_checked') or re_IMAGE.match(
                    request.uri):
                # everything passed, show page
                return main_homepage.main_homepage(), segments
            else:
                # no client side browser_checked cookie found
                # set a cookie server side to remember the segments of the original request.
                # redirect them (hit this page again) for segment == browser_check above to handle.
                #				request.addCookie("requested_page", '/'.join(segments), None, self.app.servers.httpserver._cfg_site_domain, "/")
                #return redirectTo("/browser_check/", request), []
                return browser_check.browser_check(), segments[1:]
	def locateChild(self, ctx, segments):
		ctx.remember(fourohfour.fourohfour(), inevow.ICanHandleNotFound)
		ctx.remember(error_handler.error_handler(), inevow.ICanHandleException)
		request = inevow.IRequest(ctx)
		request.setHeader('server', "AZTK - %s" % socket.gethostname())

		re_FEEDS = re.compile("^\/[a-z][_a-z0-9]{3,}\/feeds.*$")
		re_AVATAR = re.compile("^\/[a-z][_a-z0-9]{3,}\/avatar.*$")
		re_IMAGE = re.compile("^\/[a-z][_a-z0-9]{3,}\/img\/(?:(?:\d+(?:x\d+)?(?:x\d)?)|(?:original))\/([a-z0-9]{32})(?:-[a-f0-9]{5})?\.jpg$")

		if segments[0] == "css":
			return static_css(), segments[1:]
		elif segments[0] == "image":
			return static_image(), segments[1:]
		elif segments[0] == "download":
			return static_download(), segments[1:]
		elif segments[0] == "js":
			return static_js(), segments[1:]
		elif segments[0] == "RPC2":
			return self.zapi_handler, []
		elif segments[0] == "browser_check":
			return browser_check.browser_check(), segments[1:]
		elif segments[0] in static_files.files.keys():
			return static_files(), segments
		elif re_AVATAR.match(request.uri):
			return main_homepage.main_homepage(), segments
		elif re_FEEDS.match(request.uri):
			return main_homepage.main_homepage(), segments
		elif segments[0] == "qoop":
			return main_homepage.main_homepage(), segments
		else:
			if request.getCookie('browser_checked') or re_IMAGE.match(request.uri):
				# everything passed, show page
				return main_homepage.main_homepage(), segments
			else:
				# no client side browser_checked cookie found
				# set a cookie server side to remember the segments of the original request. 
				# redirect them (hit this page again) for segment == browser_check above to handle.
#				request.addCookie("requested_page", '/'.join(segments), None, self.app.servers.httpserver._cfg_site_domain, "/")
				#return redirectTo("/browser_check/", request), []
				return browser_check.browser_check(), segments[1:]
Beispiel #13
0
def birt_before_marr(indi_list, fam_list):
    indi_id = []
    line_num = []

    for people in indi_list:

        if people['SPOUSE'] != 'NONE':
            birt = datetime.strptime(people['BIRT'], '%Y-%m-%d')

            for spouses in people['SPOUSE']:
                find_fam_index = int(re.sub('\D', '', spouses)) - 1
                marr = datetime.strptime(fam_list[find_fam_index]['MARR'],
                                         '%Y-%m-%d')

                if marr <= birt:
                    line_num.append(error_handler(people['INDI']))
                    indi_id.append(people['INDI'])

    if line_num:
        print('ERROR: INDIVIDUAL: US02: lines_num:', sorted(set(line_num)),
              ': indi_id:', sorted(set(indi_id)),
              ': Birth date must before marriage date')
        return 'BUG'
def user_price_int(user_price):
    try:
        return int(user_price)
    except Exception as e:
        err_h.error_handler(e)
        return -1
Beispiel #15
0
def everytime_all_board(URL, end_date, db):
    main_url = URL['url']
    board_search_url = "https://everytime.kr/community/search?keyword="
    board_search_word = ['게시판', '갤러리']
    board_list = []
    # driver 연결
    try:
        driver = chromedriver()
        driver = everytime.login(driver)
    except Exception as e:
        error_handler(e, URL, main_url, db)
        return
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "a.article")))
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    # 에브리타임 상단 동적 게시판 긁기=============================================================================
    board_group_list = bs.find("div", {
        "id": "submenu"
    }).findAll('div', {"class": "group"})
    for board_group in board_group_list:
        try:
            board_li_list = board_group.find("ul").findAll("li")
            for board_li in board_li_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_li.find("a").text
                if board_li.find("a").text.strip() == "더 보기":
                    continue
                else:
                    board_li_dic['url'] = main_url + board_li.find("a")['href']
                if (board_li_dic['tag'].find("찾기") != -1):
                    continue
                board_list.append(board_li_dic)
        except:
            continue
    # 에브리타임 추가 동적 게시판 긁기
    for search_word in board_search_word:
        try:
            board_search_url_done = board_search_url + search_word
            driver.get(board_search_url_done)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a.result")))
            html = driver.page_source
            bs = BeautifulSoup(html, 'html.parser')
            board_a_list = bs.find("div", {
                "class": "searchresults"
            }).findAll('a')
            for board_a in board_a_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_a.find("h3").text
                board_li_dic['url'] = main_url + board_a.get('href')
                board_list.append(board_li_dic)
        except:
            continue
    #===========================================================================================================
    # 동적 게시판들 반복문
    for board in board_list:
        page = 1
        page_flag = 0
        board_url = board['url']
        page_url = Change_page(board_url, page)  #현재 페이지 포스트 url 반환
        print("\nTarget : ", URL['info'], " :: ", board['tag'])
        continue_handler(URL['info'] + " :: " + board['tag'], URL, page_url)
        # 페이지 반복문
        while True:
            if page_flag == 50:
                page_flag = 0
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
            try:
                print("page_url :::: ", page_url)  #현재 url 출력
                print("Page : ", page)  #현재 페이지 출력
                post_urls = Parsing_list_url(main_url, page_url, driver, db)
                # everytime 고질병 문제 고려, 재시도
                if len(post_urls) == 0:
                    time.sleep(2)
                    post_urls = Parsing_list_url(main_url, page_url, driver,
                                                 db)
                post_data_prepare = []
                # 포스트 반복문
                for post_url in post_urls:
                    get_post_data = Parsing_post_data(driver, post_url, URL,
                                                      board['tag'], db)
                    if get_post_data == "error":
                        break
                    title = get_post_data[1]
                    date = get_post_data[2]
                    print(date, "::::", title)  #현재 크롤링한 포스트의 date, title 출력
                    #게시물의 날짜가 end_date 보다 옛날 글이면 continue, 최신 글이면 append
                    if str(date) <= end_date:
                        continue
                    else:
                        post_data_prepare.append(get_post_data[0])
                add_cnt = db_manager(URL, post_data_prepare, db)
                print("add_OK : ", add_cnt)  #DB에 저장된 게시글 수 출력
                #DB에 추가된 게시글이 0 이면 break, 아니면 다음페이지
                if add_cnt == 0:
                    page_flag = 0
                    break
                else:
                    page_flag += 1
                    page += 1
                    page_url = Change_page(board_url, page)
            except Exception as e:
                error_handler(e, URL, page_url, db)
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
                break
    #드라이버 연결 해제
    driver.quit()
def Crawling(URL, db):
	driver = None
	info_name = URL['info'].split('_')
	crawling_name = info_name[0]	#게시판 크롤링 선택
	page = 1
	main_url = URL['url']	#게시판 url 추출 : 페이지 바꾸는 데에 사용
	page_url = eval(crawling_name + '.Change_page(main_url, page)')	#현재 페이지 포스트 url 반환
	end_date = date_cut(URL['info'])	# end_date 추출
	if crawling_name in ["sj34"]:		# 동적 게시판 예외
		sj34.everytime_all_board(URL, end_date, db)
		return
	if crawling_name in ["sj20"]:		# 제외 게시판
		return;

	#현재 크롤링하는 게시판 info 출력
	print("Target : ", URL['info'])
	continue_handler(URL['info'], URL, page_url)

	#크롤링 유무판단
	if is_crawling(db, URL['info']) == False:
		return

	while True:
		if crawling_name in ["sj23", "sj26", "sj27", "sj28", "sj30", "sj44"]:
			lastly_post = get_lastly_post(URL, db)
		try:
			print("\npage_url :::: ", page_url)	#현재 url 출력
			print("Page : ", page)				#현재 페이지 출력
			#driver_page 생성---------------------------
			if crawling_name in ['sj10']:
				driver_page = URLparser_EUCKR(page_url)
			elif crawling_name in ['sj12']:
				driver_page = URLparser_UTF8(page_url)
			else:
				driver_page = URLparser(page_url)
			#-------------------------------------------
			#Selenium을 쓰는 경우----------------------------------------------------------------------------------------------
			if crawling_name in ["sj23", "sj26", "sj27", "sj28", "sj29", "sj38", "sj44"]:
				data = eval(crawling_name + '.Parsing_list_url(URL, page_url)')
				driver = data[0]
				post_urls = data[1]
			elif crawling_name in ["sj30"]:#---------------------------세종대역 예외처리
				data = eval(crawling_name + '.Parsing_list_url(URL, page_url, lastly_post, db, driver)')
				driver = data[0]
				post_urls = data[1]
			#Requests를 쓰는 경우----------------------------------------------------------------------------------------------
			else:
				#로그인을 하는 경우-------------------------------------------------------------------------------
				if URL['login'] == 1:
					post_urls = eval(crawling_name + '.Parsing_list_url(URL, page_url)')
				#로그인을 하지않는 경우---------------------------------------------------------------------------
				else:
					if driver_page is None:		#Connect Failed 이면 break
						error_handler("driver_none", URL, page_url, db)
						break
					else:
						#parsing 형태--------------------------------------------------
						if crawling_name in ['sj10']:
							bs_page = BeautifulSoup(driver_page, 'lxml')
						else:
							bs_page = BeautifulSoup(driver_page, 'html.parser')
						#--------------------------------------------------------------
					post_urls = eval(crawling_name + '.Parsing_list_url(URL, bs_page)')
				#-----------------------------------------------------------------------------------------------
			#-----------------------------------------------------------------------------------------------------------------
			#get_post_data 형식 : [게시글정보dictionary, title, date]-------------------------------------------------------------------------------------------------------
			#date 규격은 "0000-00-00 00:00:00"
			post_data_prepare = []
			for post_url in post_urls:
				#Selenium인 경우--------------------------------------------------------------------------------------------------------------------
				if crawling_name in ['sj29', 'sj30']:#------------------게시판 규격인 경우
					get_post_data = eval(crawling_name + '.Parsing_post_data(driver, post_url, URL)')
				#---------------------------------------------------------------------------------------------------게시판 규격이 아닌 경우
				elif crawling_name in ['sj23', 'sj26', 'sj27', 'sj28', 'sj44']:
					data = eval(crawling_name + '.Parsing_post_data(driver, post_url, URL, lastly_post)')
					post_data_prepare = data[0]
					lastly_post = data[1]
					if lastly_post is None:
						pass
					else:
						push_lastly_post(URL, lastly_post, db)
				#Requests인 경우--------------------------------------------------------------------------------------------------------------------
				else:
					#driver_post 생성--------------------------------
					if crawling_name in ["sj21", "sj4", "sj5", "sj8", "sj16"]: #---driver_post가 필요없는 경우
						pass
					elif crawling_name in ['sj10', 'sj33']:
						driver_post = URLparser_EUCKR(post_url)
					elif crawling_name in ['sj12']:
						driver_post = URLparser_UTF8(post_url)
					else:
						driver_post = URLparser(post_url)
					#------------------------------------------------
					#-----------------------------------------------------------------------------------------------위키백과 구조
					if crawling_name in ['sj21']:
						get_post_data = eval(crawling_name + '.Parsing_post_data(post_url, URL)')
					#-----------------------------------------------------------------------------------------------게시판 규격이 아닌 구조
					elif crawling_name in ["sj4", "sj5", "sj8", "sj16"]:
						post_data_prepare = eval(crawling_name + '.Parsing_post_data(post_url, URL)')
						break
					#-----------------------------------------------------------------------------------------------게시판 규격인 구조
					else:
						if driver_post is None:		#Connect Failed 이면 continue
							error_handler("driver_none", URL, page_url, db)
							break
						else:
							#parsing 형태-------------------------------------------
							if crawling_name in ['sj10']:
								bs_post = BeautifulSoup(driver_post, 'lxml')
							elif crawling_name in ['sj12']:
								bs_post = driver_post
							else:
								bs_post = BeautifulSoup(driver_post, 'html.parser')
							#-------------------------------------------------------
						get_post_data = eval(crawling_name + '.Parsing_post_data(bs_post, post_url, URL)')
				#-----------------------------------------------------------------------------------------------------------------------------------
				
				#post_data_prepare이 이미 완성된 경우-----------------------------------------------------------------------
				if crawling_name in ["sj4", "sj5", "sj8", "sj16", "sj23", "sj26", "sj27", "sj28", "sj44"]:
					pass
				#post_data_prepare이 완성되지 않은 경우---------------------------------------------------------------------
				else:
					if get_post_data == None:	#잘못된 포스트 데이터인 경우
						continue
					title = get_post_data[1]
					date = get_post_data[2]
		
					print(date, "::::", title)	#현재 크롤링한 포스트의 date, title 출력
		
					#게시물의 날짜가 end_date 보다 옛날 글이면 continue, 최신 글이면 append
					if str(date) <= end_date:
						continue
					else:
						post_data_prepare.append(get_post_data[0])
			#----------------------------------------------------------------------------------------------------------
			#--------------------------------------------------------------------------------------------------------------------------------------------------------------
			add_cnt = db_manager(URL, post_data_prepare, db)
			print("add_OK : ", add_cnt)	#DB에 저장된 게시글 수 출력
		
			#dirver 종료 [Selenium 을 사용했을 시]
			if crawling_name in ["sj23", "sj26", "sj27", "sj28", "sj29", "sj30", "sj38", "sj44"]:
				driver.quit()
			
			#DB에 추가된 게시글이 0 이면 break, 아니면 다음페이지
			if add_cnt == 0:
				break
			else:
				page += 1
				page_url = eval(crawling_name + '.Change_page(main_url, page)')
		# Error handler : 만약 크롤링이 실패했을 경우, 에러를 logging 하고 크롤링을 중단한다.
		except Exception as e:
			error_handler(e, URL, page_url, db)
			break
        for URL in URLS:  #List에서 하나의 요소 = URL
            if not (URL['info'] in INFO_LIST):
                print('URL parsing Skip! : ' + str(URL["url"]))
                print(
                    '-----------------------------------------------------------------------------------------------------------------\n'
                )
                continue
            try:
                print('URL parsing Start! : ' + str(URL["url"]))
                Crawling(URL, db)
                print(
                    '-----------------------------------------------------------------------------------------------------------------\n'
                )
            except Exception as e:
                error_handler(e, URL, URL["url"], db)
                continue

        print(":::: Posts in Boards Count ::::")
        posts_cnt(db)  # 모든 게시물 빈도 출력

        print("\n\nCrawling End!\n\n")

        #프로그램 종료시간
        end_time = datetime.now()
        try:
            log_write(start_time, end_time, db, BEFORE_DATA)
        except:
            error_logging(e, '', '', db)

        #크롤러 관리자 갱신==========================
Beispiel #18
0
import pickle
import time
import sys
from datetime import date

from Adafruit_BNO055 import BNO055

from Packet import Packet
import HID
import profiler
import error_handler

com = Packet()
profile = profiler.profiler()
enable_profile = True  # set to True to enable profiling feaures
errors = error_handler.error_handler()  # used to store error messages

###############################################################################
# set up the gamepad
###############################################################################

hid_enable = True
hid = object
try:
    hid = HID.Gamepad(-100, 100)
except:
    error = ("ERROR: Failed to start gamepad interface, continuing anyways." +
             "Program operation will be severly limited." +
             "Check that all tether cables are securly connected and that " +
             "the gamepad is plugged in to the tether box.")
    errors.add(error)
Beispiel #19
0
def Parsing_post_data(driver, post_url, URL, board_tag, db):
    return_data = []
    post_data = {}
    domain = Domain_check(URL['url'])

    try:
        driver.get(post_url)
    except:
        try:
            time.sleep(3)
            driver.get(post_url)
        except:
            return "error"

    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR,
                 "time.large")))  #time.large를 발견하면 에이작스 로딩이 완료됬다는 가정
    except:
        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "time.large")))
        except Exception as e:
            error_handler(e, URL, post_url, db)
            return "error"

    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    if bs.find("h2", {"class": "large"}) != None:
        title = bs.find("h2", {"class": "large"}).get_text(" ", strip=True)
    else:
        title = "0"
    author = "0"
    date = bs.find("time").text.strip()
    date = everytime_time(date)
    post = bs.find("p", {'class': "large"}).get_text(" ", strip=True)
    post = post_wash(post)  #post 의 공백을 전부 제거하기 위함
    if bs.find("figure", {"class": "attach"}) is not None:
        try:
            img = bs.find("figure", {
                "class": "attach"
            }).find("img")['src']  #게시글의 첫번째 이미지를 가져옴.
            if 1000 <= len(img):
                img = 5
            else:
                if img.startswith("http://") or img.startswith(
                        "https://"):  # img가 내부링크인지 외부 링크인지 판단.
                    pass
                elif img.startswith("//"):
                    img = "http:" + img
                else:
                    img = domain + img
        except:
            img = 5
    else:
        img = 5
    if img != 5:
        if img_size(img):
            pass
        else:
            img = 5
    img = 5

    post_data['title'] = title.upper()
    post_data['author'] = author.upper()
    post_data['date'] = date
    post_data['post'] = post.lower()
    board_tag = re.compile('[^ ㄱ-ㅣ가-힣|a-z]+').sub('', board_tag.lower())
    for remove_tag in SJ34_DELETE_TAGS:
        board_tag = board_tag.replace(remove_tag, "")
    tag_done.append(board_tag)
    post_data['img'] = img
    post_data['url'] = post_url
    post_data['info'] = URL['info'].split("_")[1] + "_" + board_tag
    if post_data["title"] == "0":
        post_data["title"] = post_data["post"][:30] + "..."

    return_data.append(post_data)
    return_data.append(post_data['title'])
    return_data.append(date)
    return return_data