def SendGmail(subject, message, to=None, attach=None): gmail_username = GetTheConfig('google', 'gmail_username') # email User gmail_user = GetTheConfig('google', 'gmail_user') # email Address gmail_pwd = GetTheConfig('google', 'gmail_pwd') # email password msg = MIMEMultipart('alternative') if not to == None: msg['To'] = to else: msg['To'] = gmail_user to = gmail_user msg['From'] = gmail_username msg['Subject'] = Header(subject, 'utf-8') # 제목 msg.attach(MIMEText(message, 'plain', 'utf-8')) # 내용이 평문일 경우 #msg.attach(MIMEText(message, 'html', 'utf-8')) # 내용이 html일 경우 if not attach == None: # 첨부파일이 존재하는 경우 - 일반 글 배포 외 다른 용도로 사용 시 part = MIMEBase('application', 'octet-stream') part.set_payload(open(attach, 'rb').read()) Encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(attach)) msg.attach(part) mailServer = smtplib.SMTP("smtp.gmail.com", 587) mailServer.ehlo() mailServer.starttls() mailServer.ehlo() mailServer.login(gmail_user, gmail_pwd) mailServer.sendmail(gmail_user, to, msg.as_string()) mailServer.close()
def GetInformation(contentsInformation): searchResultUrl = GetTheConfig('youtube', 'base_url') + contentsInformation['id'] searchResultUrlDic = { "url":(GetTheConfig('youtube', 'base_url')+ contentsInformation['id']) , "image":contentsInformation['image'] } return searchResultUrl, searchResultUrlDic
def YoutubeSearch(keyword): resultUrls = [] resultDic = [] urls = [] # API 사용 인증 토큰 DEVELOPER_KEY = GetTheConfig('youtube', 'DEVELOPER_KEY') YOUTUBE_API_SERVICE_NAME = "youtube" YOUTUBE_API_VERSION = "v3" youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY) search_response = youtube.search().list( q=keyword, part="id,snippet", maxResults=GetTheConfig('youtube', 'QUANTITY') ).execute() for search_result in search_response.get("items", []): if "youtube#video" == search_result["id"]["kind"]: resultUrls, resultDic = ExtractInformation(resultUrls, resultDic, search_result) deduplicateUrls = resultUrls #11/11 중복 제거 로직 테스트 후 재 반영 # 중복제거한 URL 값을 이용하여 중복 제거한 Dictionary 만들기 for resultUrl in resultUrls: for dicUrl in resultDic: if dicUrl['url'] == resultUrl: urls.append({"Youtube": dicUrl}) break return urls
def delete_keyword(): if not session.get('logged_in'): abort(401) idx = request.form['idx'] executeNcommit(GetTheConfig('query', 'DELETE_KEYWORD') , idx) flash(GetTheConfig('string', 'DELETE_KEYWORD')) return redirect(url_for('index'))
def RequestSearch(urls, keyword): searchCurs = 0 for url in search(keyword, stop=int(GetTheConfig( 'google', 'QUANTITY'))): # API 이상 동작으로 검색량 조절 불가 urls.append(url) searchCurs = searchCurs + 1 sleep(1) if searchCurs == int(GetTheConfig('google', 'QUANTITY')): return urls # 리턴 문에 문제가 있음 11/24
def execute_schedule(): if not session.get('logged_in'): abort(401) result = StartingSchedule() if result == True: flash(GetTheConfig('string', 'RUN_SCHEDULE')) elif result == False: flash(GetTheConfig('string', 'NON_SAVED')) return redirect(url_for('index'))
def scheduler(self, type, job_id): print "%s Scheduler Start" % type if type == 'interval': self.sched.add_job(self.searching, type, seconds=10, id=job_id, args=(type, job_id)) elif type == 'cron': self.sched.add_job(self.searching, type , day_of_week=GetTheConfig("schedule", "day_week") , hour=GetTheConfig("schedule", "hour") , minute=GetTheConfig("schedule", "minute") , id=job_id , args=(type, job_id))
def IncrementSearchingQuantity(): print "None Data" IncrementQuantity() # 검색 량 증가 title = "[Increment Searching Quantity] " + now() message = "Crehacktive bot Increment Searching Quantity/Standard Increment : " + GetTheConfig( 'manager', 'standard_increment') # 추후 결과 정보 추가 예정 SendGmail(title, message, GetTheConfig('google', 'bot_admin')) result = subprocess.call('../bin/python ./CLI_Manager -r', shell=True) if result == GetTheConfig('string', 'RUN_SCHEDULE'): return True elif result == GetTheConfig('string', 'NON_SAVED'): return False
def login(): error = None if request.method == 'POST': if request.form['username'] != GetTheConfig('manager', 'USERNAME'): error = GetTheConfig('string', 'ERROR_USERNAME') elif request.form['password'] != GetTheConfig('manager', 'PASSWORD'): error = GetTheConfig('string', 'ERROR_PASSWORD') else: session['logged_in'] = True flash(GetTheConfig('string', 'LOGIN')) return redirect(url_for('index')) return render_template('login.html', error=error)
def IncrementQuantity(): Standard_increment = int(GetTheConfig('manager', 'standard_increment')) platforms = ['google', 'twitter', 'bing', 'baidu', 'yahoo'] # Youtube는 API 상으로 50이 최대 값 for platform in platforms: if platform == 'baidu': Standard_increment = Standard_increment - 9 WriteTheConfig( platform, 'quantity', int(GetTheConfig(platform, 'quantity')) + Standard_increment) Standard_increment = int(GetTheConfig('manager', 'standard_increment'))
def TwitterDistribute(message): # 트위터 API 사용 인증용 토큰 값 ACCESS_TOKEN = GetTheConfig('twitter', 'ACCESS_TOKEN') ACCESS_SECRET = GetTheConfig('twitter', 'ACCESS_SECRET') CONSUMER_KEY = GetTheConfig('twitter', 'CONSUMER_KEY') CONSUMER_SECRET = GetTheConfig('twitter', 'CONSUMER_SECRET') twitterApi = Twitter.Api(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, access_token_key=ACCESS_TOKEN, access_token_secret=ACCESS_SECRET) print(twitterApi.VerifyCredentials())
def FacebookSendPosting(): # Fill in the values noted in previous steps here credentials = { "page_id" : GetTheConfig("facebook", "PAGE_ID"), # Step 1 "app_id" : GetTheConfig("facebook", "APP_ID"), "app_secret_code": GetTheConfig("facebook", "APP_SECRET_CODE"), } accessToken = getAccessToken(credentials) credentials["access_token"] = accessToken api = get_api(credentials) msg = "Hello, world!" status = api.put_wall_post(msg) print status
def RegularSearch(): keywords = executeNfetchall( GetTheConfig('query', 'select_keyword_research-keyword')) for keyword in keywords: searchResult = GatheringInformation(keyword.get('keyword')) result = SavingSearchData(searchResult) if result == False: return False SyncSite() # 사이트 목록 테이블에 검색된 데이터 동기화 title = "[Search Complete] " + now() message = "Crehacktive bot Search Complete" # 추후 결과 정보 추가 예정 SendGmail(title, message, GetTheConfig('google', 'bot_admin')) return True
def FacebookSearch(keyword): urls = [] resultUrls = [] response = get("https://graph.facebook.com/search?access_token=" + access_token + "&q=" + keyword + "&type=page") data = response.text.encode('utf-8') jsonData = json.loads(data) page = 1 user = jsonData["data"][0]["id"] # 값 하나하나 접근하기 graph = facebook.GraphAPI(access_token) profile = graph.get_object(user) posts = graph.get_connections(profile['id'], connection_name='posts') while True: for post in posts['data']: urls.append({"Facebook":GetTheUrl(post=post)}) try: posts = get(posts['paging']['next']).json() except: posts = False finally: if not posts or page == GetTheConfig('facebook', 'QUANTITY'): break page = page + 1 urls = DeduplicateValue(urls) for url in urls: resultUrls.append({"Facebook": url}) return resultUrls
def YahooSearch(keyword): urls = [] resultUrls = [] quantity = GetTheConfig('yahoo','QUANTITY') page = 1 user_agent = { 'User-Agent': choice(user_agent_list)} while True: response = get('http://search.yahoo.co.jp/search?p=%s&b=%d' % (keyword, page), headers=user_agent, timeout=5) html = response.text.encode('utf-8') soup = BeautifulSoup(html) elements = soup.findAll('div', {'class': 'hd'}) for url in elements: for a in url.find_all('a', href=True): urls.append(a['href']) if page // 10 == int(quantity) // 10: break page = page + 10 urls = DeduplicateValue(urls) for url in urls: resultUrls.append({"Yahoo": url}) return resultUrls
def edit_schedule(): if not session.get('logged_in'): abort(401) time = request.form['time'] WriteTheConfig('schedule', 'HOUR', time) flash(GetTheConfig('string', 'EDIT_SCHEDULE')) return redirect(url_for('index'))
def BingSearch(keyword): urls = [] bing_web = PyBingWebSearch(GetTheConfig('bing', 'Key'), keyword, web_only=False) results = bing_web.search(limit=int(GetTheConfig('bing', 'QUANTITY')), format='json') for result in results: url = result.url urls.append(url) results = DeduplicateValue(urls) urls = [] for result in results: urls.append({"Bing": result}) return urls
def add_keyword(): if not session.get('logged_in'): abort(401) keyword = request.form['keyword'] if keyword == '' : error = GetTheConfig('string', 'Blank_KEYWORD') return index(error=error) storedKeywords = executeNfetchall(GetTheConfig('query', 'SELECT_KEYWORD_RESEARCH')) for row in storedKeywords: if (keyword == row['keyword']): error = GetTheConfig('string', 'DUPLICATE_KEYWORD') return index(error=error) executeNcommit(GetTheConfig('query', 'INSERT_KEYWORD') , (keyword, 0, now())) flash(GetTheConfig('string', 'INSERT_KEYWORD')) return redirect(url_for('index'))
def getHtml(url): response = urlopen(url) try: html = response.read() except httplib.IncompleteRead, e: page = e.partial title = "[Baidu Error] " + now() message = "Baidu Error " + page # 추후 결과 정보 추가 예정 SendGmail(title, message, GetTheConfig('google', 'bot_admin')) html = "Online Shield"
def TwitterSearch(keyword): urls = [] resultUrls = [] hangul = re.compile('[ㄱ-ㅣ가-힣]+') # 한글과 띄어쓰기를 제외한 모든 글자 # 트위터 API 사용 인증용 토큰 값 ACCESS_TOKEN = GetTheConfig('twitter', 'ACCESS_TOKEN') ACCESS_SECRET = GetTheConfig('twitter', 'ACCESS_SECRET') CONSUMER_KEY = GetTheConfig('twitter', 'CONSUMER_KEY') CONSUMER_SECRET = GetTheConfig('twitter', 'CONSUMER_SECRET') oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) twitter = Twitter(auth=oauth) params = { 'result_type': 'recent', 'count': int(GetTheConfig('twitter', 'QUANTITY')) } if (hangul.match(keyword)): params['lang'] = 'ko' params['q'] = keyword query = twitter.search.tweets(**params) for resultEntries in query["statuses"]: resultEntry = resultEntries["entities"]["urls"] for url in resultEntry: urls.append(url[u"expanded_url"]) urls = DeduplicateValue(urls) for url in urls: resultUrls.append({"Twitter": url}) return resultUrls
def connect_db(readData="save"): if readData == "distribute": DB = GetTheConfig('database', 'board_DB') elif readData == "save": DB = GetTheConfig('database', 'bot_DB') conn = pymysql.connect(host=GetTheConfig('database', 'HOST') , user=GetTheConfig('database', 'USER') , password=GetTheConfig('database', 'PASSWORD') , db=DB , charset=GetTheConfig('database', 'CHARSET')) return conn
def BaiduSearch(keyword): urls = [] count = -1 page = int(GetTheConfig('baidu', 'QUANTITY')) + 1 regexStoredBaiduUrl = re.compile(r'url":"(.*?)"}') while count < page: count = count + 1 paging = 10 * count html = getHtml("http://www.baidu.com/s?wd=%s&pn=%s" % (keyword, str(paging))) if html == "Online Shield": continue storedBaiduUrls = getElement(regexStoredBaiduUrl, html) realUrls = getRealUrl(storedBaiduUrls) realUrls = DeduplicateValue(realUrls) for url in realUrls: urls.append({"Baidu": url}) return urls
def SyncSite(): storedSiteUrls = executeNfetchall(GetTheConfig('query','select_distinct_siteUrl')) for siteUrlDic in storedSiteUrls: siteUrl = siteUrlDic["siteUrl"] removeProtocal = re.compile(r'https://|http://|ftp://') siteUrl = removeProtocal.sub('', siteUrl) # 프로토콜 제거 numberOfSite = NumberOfValue(GetTheConfig('query', 'count_data_research_siteUrl'), '%'+siteUrl) # 해당 도메인에서 검색된 결과 개수 columnExist = NumberOfValue(GetTheConfig('query', 'count_site_research_siteUrl') , siteUrl) # 데이터베이스에 해당 도메인 값 존재 여부 if columnExist == 0: executeNcommit(GetTheConfig('query', 'insert_site'), (siteUrl, numberOfSite, now())) elif columnExist == 1: executeNcommit(GetTheConfig('query', 'update_site'), (numberOfSite, now(), siteUrl)) else: title = "[Check Database ] " + now() message = "Check database - [site_research] duplicate siteUrl ("+siteUrl+")" # 추후 결과 정보 추가 예정 SendGmail(title, message, GetTheConfig('google', 'bot_admin'))
def SyncSite(): storedSiteUrls = executeNfetchall(GetTheConfig('query', 'distinct_siteUrl')) for siteUrl in storedSiteUrls: removeProtocal = re.compile(r'https://|http://|ftp://') siteUrl = removeProtocal.sub('', siteUrl) # 프로토콜 제거 numberOfSite = executeNfetchall( GetTheConfig('query', 'count_data_research_siteUrl'), siteUrl) if executeNfetchall( GetTheConfig('query', 'count_site_research_siteUrl'), siteUrl) == 0: executeNcommit(GetTheConfig('query', 'insert_site'), siteUrl, numberOfSite, now()) elif executeNfetchall( GetTheConfig('query', 'count_site_research_siteUrl'), siteUrl) == 1: executeNcommit(GetTheConfig('query', 'update_site'), numberOfSite, now(), siteUrl) else: title = "[Check Database ] " + now() message = "Check database - (site_research) duplicate siteUrl" # 추후 결과 정보 추가 예정 SendGmail(title, message, GetTheConfig('google', 'bot_admin'))
def GetTheUrl(post): return GetTheConfig('facebook','base_url')+ post['id']
# -*- coding: utf-8 -*- # !/usr/bin/python import json import facebook from requests import get from Config import GetTheConfig from app.extract.UniqueValue import DeduplicateValue access_token = GetTheConfig('facebook','app_id') + "|" + GetTheConfig('facebook','app_secret_code') def GetTheUrl(post): return GetTheConfig('facebook','base_url')+ post['id'] def FacebookSearch(keyword): urls = [] resultUrls = [] response = get("https://graph.facebook.com/search?access_token=" + access_token + "&q=" + keyword + "&type=page") data = response.text.encode('utf-8') jsonData = json.loads(data) page = 1 user = jsonData["data"][0]["id"] # 값 하나하나 접근하기 graph = facebook.GraphAPI(access_token) profile = graph.get_object(user) posts = graph.get_connections(profile['id'], connection_name='posts') while True: for post in posts['data']: urls.append({"Facebook":GetTheUrl(post=post)})
def logout(): session.pop('logged_in', None) flash(GetTheConfig('string', 'LOGOUT')) return redirect(url_for('index'))
def SavingSearchData(searchResults): Unspecified = 1 workerIndex = 0 worker = [u"이승용", u"이상훈", u"김성규", u"하동민"] databaseStoredUrls = [] sourceImageUrls = None matchWorkerUrls = [] imageUrl = None if searchResults == []: # 검색된 값이 없는 경우 IncrementSearchingQuantity() # 검색 결과에서 URL 만 추출 sourceUrls, sourceImageUrls = ExtractSearchUrl(searchResults) # 중복 확인 StoredSearchUrls = executeNfetchall( GetTheConfig('query', 'SELECT_DATA_RESEARCH-RESEARCHURL')) for StoredSearchUrl in StoredSearchUrls: databaseStoredUrls.append(StoredSearchUrl['researchUrl']) UniqueUrls = DeduplicateValue( sourceUrls, databaseStoredUrls) # 데이터베이스에서 조회한 데이터와 비교하여 중복된 값 제거 if UniqueUrls == []: # 데이터베이스에서 조회한 데이터와 비교 후 고유 값이 없으면 검색 량 증가 IncrementSearchingQuantity() for url in UniqueUrls: for sourceImageUrl in sourceImageUrls: # 썸네일 이미지 if url == sourceImageUrl.keys()[0]: imageUrl = sourceImageUrl[url] break for searchResult in searchResults: # 플랫폼 종류 for urlDictionary in searchResult: if urlDictionary.keys()[0] == "Youtube": if url == urlDictionary[urlDictionary.keys()[0]]["url"]: platform = urlDictionary.keys()[0] break else: if url == urlDictionary.values(): platform = urlDictionary.keys()[0] break siteUrl = ExtractDomain(url) # 도메인 추출 matchWorkerUrls.append({ 'worker': worker[workerIndex] # 담당자 지정 , 'platform': platform, 'siteUrl': siteUrl, 'imageUrl': imageUrl, 'url': url }) workerIndex = workerIndex + 1 if workerIndex == 4: workerIndex = 0 # 데이터베이스 저장 for matchWorkerUrl in matchWorkerUrls: print matchWorkerUrl executeNcommit( GetTheConfig('query', 'INSERT_DATA'), (matchWorkerUrl['worker'], '', matchWorkerUrl['siteUrl'], matchWorkerUrl['url'], matchWorkerUrl['platform'], Unspecified, matchWorkerUrl['imageUrl'], str(now()), str(now('limit')))) return True
def index(error=None): page = 1 # 현재 페이지 per_page = 10 # 페이지 단위 keywords = executeNfetchall(GetTheConfig('query', 'SELECT_KEYWORD_RESEARCH')) sites = executeNfetchall(GetTheConfig('query', 'SELECT_SITE_RESEARCH')) schedule = {"time": GetTheConfig('schedule', 'HOUR')} # 스케쥴 keywordTotal = executeNfetchall(GetTheConfig('query',"keywordTotal"))[0]["count(*)"] # 저장된 keyword 총 개수 siteTotal = executeNfetchall(GetTheConfig('query', "siteTotal"))[0]["count(*)"] # 저장된 site 총 개수 keywordPagination = Pagination( css_framework=GetTheConfig('manager', 'css_framework'), link_size=GetTheConfig('manager', 'link_size'), show_single_page=GetTheConfig('manager', 'single_page_or_not'), page=page, per_page=per_page, total=keywordTotal, href="page?keywordPage={0}", record_name='Keyword', format_total=True, format_number=True, ) sitePagination = Pagination( css_framework=GetTheConfig('manager', 'css_framework'), link_size=GetTheConfig('manager', 'link_size'), show_single_page=GetTheConfig('manager', 'single_page_or_not'), page=page, per_page=per_page, total=siteTotal, href="page?sitePage={0}", record_name='Domain', format_total=True, format_number=True, ) return render_template('show_entries.html' , keywords=keywords , sites=sites , schedule=schedule , error=error , per_page=per_page , keywordPagination=keywordPagination , sitePagination=sitePagination)
def show_page(error=None): keywordPage = int(request.args.get('keywordPage', default=1, type=int)) # 키워드 리스트 페이지 sitePage = int(request.args.get('sitePage', default=1, type=int)) # 사이트 리스트 페이지 per_page = int(GetTheConfig('manager', 'per_page')) keywordList_offset = per_page * (keywordPage - 1) # 페이지 오프셋 값 siteList_offset = per_page * (sitePage - 1) # 페이지 오프셋 값 keywords = executeNfetchall(GetTheConfig('query', 'select_keyword_research_limit').format(keywordList_offset, GetTheConfig('manager', 'per_page'))) sites = executeNfetchall(GetTheConfig('query', 'select_site_research_limit').format(siteList_offset, GetTheConfig('manager', 'per_page'))) schedule = {"time": GetTheConfig('schedule', 'HOUR')} # 스케쥴 keywordTotal = executeNfetchall(GetTheConfig('query',"keywordTotal"))[0]["count(*)"] # 저장된 keyword 총 개수 siteTotal = executeNfetchall(GetTheConfig('query', "siteTotal"))[0]["count(*)"] # 저장된 site 총 개수 keywordPagination = Pagination( css_framework=GetTheConfig('manager', 'css_framework'), link_size=GetTheConfig('manager', 'link_size'), show_single_page=GetTheConfig('manager', 'single_page_or_not'), page=keywordPage, per_page=per_page, total=keywordTotal, href="?keywordPage={0}", record_name='Keyword', format_total=True, format_number=True, ) sitePagination = Pagination( css_framework=GetTheConfig('manager', 'css_framework'), link_size=GetTheConfig('manager', 'link_size'), show_single_page=GetTheConfig('manager', 'single_page_or_not'), page=sitePage, per_page=per_page, total=siteTotal, href="?sitePage={0}", record_name='Domain', format_total=True, format_number=True, ) return render_template('show_entries.html' , keywords=keywords , sites=sites , schedule=schedule , error=error , per_page=per_page , keywordPagination=keywordPagination , sitePagination=sitePagination)