def process_thread_screenshot(self, post_type, post_id): try: # show_debug('Process take screenshot ...' + post_id) link = MongodbClient.get_instance().get_link_collection().find_one( {'link_id': post_id}) if link: data = {'processing_screenshot': 0} screenshot = self.selenium_types[post_type].screen_post( self, post_id) if screenshot: data['screenshot'] = screenshot MongodbClient.get_instance().get_link_collection().update_one( {'_id': link['_id']}, {'$set': data}) data = { 'link_id': get_master_attr('link_id', link, None), 'type': get_master_attr('type', link, None), 'screenshot': get_master_attr('screenshot', link, None) } hook_url = link['hook_url'] result = requests.post(hook_url, data) else: show_debug('NOT FOUND LINK') except Exception as e: print('error code: #117228') print(format(e))
def listen(self): while True: try: data = _recev(self.client) if not data: show_warning('Not data') break if "action" in data: if data['action'] == 'notify' and data[ 'ref'] == 'subscribed': show_notify("subscribe was successfully") if data['action'] == 'assign': show_text('== NEW TASK Assign ===') show_debug( 'Receiver assign task with link %s' % get_master_attr('params.link_id', data, None)) self.do_assign(data['params']) if data['action'] == 'live': _send(self.client, {'action': 'live', 'status': True}) except ConnectionError as err: show_warning("OS error: {0}".format(err)) except Exception as err1: show_warning("OS error: {0}".format(err1)) self.client.close() break
def process_response(self, result): show_debug('processing response ...') link = self.mongodb.get_link_collection().find_one( {'link_id': result['data']['link_id']}) collection_history = self.mongodb.get_link_history_collection() if link: item = { 'profile': get_master_attr('data.profile', result, None), 'likes': result['data']['likes'], 'comments': result['data']['comments'], 'post_created_time': result['data']['created_time'], 'updated_at': result['data']['updated_at'] } # screenshot Selenium.get_instance().screen_post('IG', result['data']['link_id']) item['processing_screenshot'] = 1 item['screenshot'] = None res = self.mongodb.get_link_collection().update_one( {'_id': link['_id']}, {'$set': item}) item['link_id'] = result['data']['link_id'] collection_history.insert_one(item) if res: return 1 return 0 return -1
def process_crawler_links(self, connection, request_info): self.init_result() method = request_info['method'] if method == 'POST': data = request_info['data'] crawler_type = get_master_attr('type', data, None) if crawler_type == 'timeline': print_header_log() start_schedule() if crawler_type == 'links': def process_list_job(arr): for j in arr: job(j) def process_jobs(link_social): print('Total link: ', len(link_social)) x = threading.Thread( target=process_list_job, args=(link_social[0:int(len(link_social) / 2)], )) y = threading.Thread( target=process_list_job, args=(link_social[int(len(link_social) / 2):len(link_social)], )) x.start() y.start() send_http_json_result(connection, { 'error': False, 'msg': 'Success' }) links = get_master_attr('links', data, []) crawlerparams = [] for link in links: l = process_take_info_link(link) if l is not None: crawlerparams.append({ 'link_id': l['link_id'], 'type': l['type'] }) process_jobs(crawlerparams) send_http_json_result(connection, { 'error': False, 'msg': 'Success' })
def process_request(self, data): result = { 'error': True, 'msg': None, 'data': None, 'ref': YoutubeLink.get_name() } url = 'https://www.googleapis.com/youtube/v3/videos?part=statistics&id=%s&key=%s' % (data['link_id'], ServerConfig.API_YTB_KEY.value) proxy = get_master_attr('proxy', data, None) s = requests.Session() if proxy: proxies = { "https": proxy, "http": proxy } s.proxies = proxies try: show_debug('Call request: %s' % url) response = s.get(url, timeout=10) except requests.ConnectionError as err: show_warning(format(err)) result['type'] = 'requests' result['msg'] = str(err) except requests.HTTPError as err: show_warning(format(err)) else: d = response.json() if 'error' not in d: result['error'] = False result['data'] = { 'link_id': data['link_id'], 'dislikes': get_master_attr('items.0.statistics.likeCount', d, None), 'likes': get_master_attr('items.0.statistics.dislikeCount', d, None), 'views': get_master_attr('items.0.statistics.viewCount', d, None), 'comments': get_master_attr('items.0.statistics.commentCount', d, None), 'created_time': None, 'updated_at': str(datetime.datetime.utcnow()) } else: result['msg'] = get_master_attr('error.errors.0.message', d, 'Error from api youtube') if get_master_attr('error.code', d, None) == 400: if get_master_attr('error.errors.0.reason', d, None) == 'keyInvalid': result['type'] = 'api_key' result['msg'] = 'Api key error' else: result['type'] = 'link_id' result['msg'] = 'Link id error' else: result['type'] = 'youtube_error' return result
def process_links(self, connection, request_info): self.init_result() method = request_info['method'] if method == 'GET': self.result['error'] = False link_id = get_master_attr('query_params.2', request_info, None) self.result['data'] = process_take_info_link(link_id) send_http_json_result(connection, self.result) if method == 'POST': # process insert data show_debug('Insert link data') data = request_info['data'] show_debug('data body') print(data) show_debug('Processing save data ...') self.result = process_save_data_link(data) show_notify('Success!') print(self.result) send_http_json_result(connection, self.result) if method == 'PUT': link_id = get_master_attr('query_params.2', request_info, None) show_debug('Edit link data: %s' % link_id) data = request_info['data'] print(data) show_debug('Processing ... ') if link_id: result = process_update_link(link_id, data) if result: self.result['msg'] = 'Updated' self.result['error'] = False send_http_json_result(connection, self.result) if method == 'DELETE': link_id = get_master_attr('query_params.2', request_info, None) show_debug('DELETE link data: %s' % link_id) show_debug('Processing ... ') if link_id: if process_delete_link(link_id): self.result['msg'] = 'Deleted' self.result['error'] = False send_http_json_result(connection, self.result)
def process_request(self, data): result = {'error': True, 'msg': None, 'data': None, 'ref': 'IG'} url = 'https://www.instagram.com/p/' + data['link_id'] proxy = get_master_attr('proxy', data, None) s = requests.Session() if proxy: proxies = {"https": proxy, "http": proxy} s.proxies = proxies try: response = s.get(url, timeout=10) except requests.ConnectionError as err: result['type'] = 'requests' result['msg'] = str(err) else: html = response.text regex = r"window._sharedData = {(.*)};</script>" matches = re.findall(regex, html, re.DOTALL) if matches: d = json.loads('{' + matches[0] + '}') result['error'] = False result['data'] = { 'link_id': data['link_id'], 'likes': get_master_attr( 'entry_data.PostPage.0.graphql.shortcode_media.edge_media_preview_like.count', d, None), 'comments': get_master_attr( 'entry_data.PostPage.0.graphql.shortcode_media.edge_media_preview_comment.count', d, None), 'created_time': get_master_attr( 'entry_data.PostPage.0.graphql.shortcode_media.taken_at_timestamp', d, None), 'updated_at': str(datetime.datetime.utcnow()), 'profile': { 'id': get_master_attr( 'entry_data.PostPage.0.graphql.shortcode_media.owner.id', d, None), 'username': get_master_attr( 'entry_data.PostPage.0.graphql.shortcode_media.owner.username', d, None), 'display_name': get_master_attr( 'entry_data.PostPage.0.graphql.shortcode_media.owner.full_name', d, None) } } else: result['msg'] = 'Not detect link' result['type'] = 'link_id' return result
def process_update_link(link_id, data): allow_keys = ['link_id', 'status', 'type'] mongodb = MongodbClient.get_instance() link_collection = mongodb.get_link_collection() link = link_collection.find_one({'link_id': link_id}) params = {} if link: for key in allow_keys: value = get_master_attr(key, data, None) if value is not None: params[key] = value return link_collection.update({'_id': link['_id']}, {"$set": params}) return None
def process_result_callback(link_id): link = client.get_link_collection().find_one({"link_id": link_id}) if not link: print('Not found link') return None hook_url = get_master_attr('hook_url', link, None) if hook_url: data = get_data_hook(link_id, link) try: requests.post(hook_url, data) except requests.exceptions.ConnectionError as e1: show_warning(format(e1)) except Exception as e: show_warning(format(e)) show_debug('Hook request %s' % link_id) print(data)
def listen(self): s = create_server(ServerConfig.IP_ADDRESS.value, port, num_client) while True: try: connection, client_address = s.accept() data = b'' connection.settimeout(0.5) show_text('====== NEW TASK =======') try: while True: try: msg = connection.recv(1024) if not msg: break data += msg matches = re.findall(r'\r\n\r\n$', msg.decode()) if len(matches) > 0: break except socket.error: break except Exception as e: print(e) break request_info = get_info_request(data.decode()) action = get_master_attr('query_params.1', request_info, None) # process main action if action == 'attachments': self.process_attachment(connection, request_info) if action == 'links': self.process_links(connection, request_info) if action == 'crawler-links': self.process_crawler_links(connection, request_info) except Exception as e: show_warning(format(e)) result = {"error": True, "msg": format(e)} send_http_json_result(connection, result) connection.close() except socket.error as err: print(err)
def process_response(self, result): show_debug('processing response ...') link_id = get_master_attr('data.link_id', result, None) # get user id matches = re.findall(r'(.*)_(.*)', link_id) user_id = None if len(matches): user_id = matches[0][0] link = self.mongodb.get_link_collection().find_one({'link_id': link_id}) collection_history = self.mongodb.get_link_history_collection() if link: item = { 'profile': { 'id': user_id, }, 'likes': result['data']['likes'], 'comments': result['data']['comments'], 'reactions': result['data']['reactions'], 'shares': result['data']['shares'], 'post_created_time': result['data']['created_time'], 'updated_at': result['data']['updated_at'] } Selenium.get_instance().screen_post('FB', link_id) item['processing_screenshot'] = 1 item['screenshot'] = None res = self.mongodb.get_link_collection().update_one({ '_id': link['_id'] }, { '$set': item }) item['link_id'] = result['data']['link_id'] collection_history.insert_one(item) if res: return 1 return 0 return -1
def process_save_data_link(data): result = {"error": False, "msg": "Completed", 'data': []} mongodb = MongodbClient.get_instance() link_collection = mongodb.get_link_collection() items = get_master_attr('body', data, []) hook_url = get_master_attr('hook_url', data, None) for item in items: # format deadline matches = re.findall(r'(\d{4})(\d{2})(\d{2})', item['deadline']) if len(matches) > 0: item['deadline'] = datetime.datetime(int(matches[0][0]), int(matches[0][1]), int(matches[0][2])) else: item['deadline'] = datetime.datetime.utcnow() # format deadline start matches = re.findall(r'(\d{4})(\d{2})(\d{2})', get_master_attr('camp_start', item, '')) if len(matches) > 0: item['camp_start'] = datetime.datetime(int(matches[0][0]), int(matches[0][1]), int(matches[0][2])) else: item['camp_start'] = datetime.datetime( datetime.datetime.utcnow().year, datetime.datetime.utcnow().month, datetime.datetime.utcnow().day) # format timeline timeline = get_master_attr('timeline', item, []) if len(timeline) > 0: count = 0 for itime in timeline: matches = re.findall(r'(\d{2}):(\d{2})', itime) if len(matches) > 0: timeline[count] = '%s:00' % matches[0][0] else: timeline[count] = '00:00' count += 1 item['timeline'] = timeline item['created_at'] = datetime.datetime.utcnow() item['updated_at'] = datetime.datetime.utcnow() # item['deadline'] = datetime.datetime.utcnow() item['status'] = 1 item['hook_url'] = hook_url try: result['data'].append({ 'msg': 'Success', 'error': False, 'link_id': item['link_id'] }) link_collection.insert(item) except pymongo.errors.DuplicateKeyError as e: del item['_id'] link_collection.update({'link_id': item['link_id']}, {'$set': item}) result['data'].append({ 'msg': 'Replace', 'error': False, 'link_id': item['link_id'] }) except Exception as e: result['data'].append({ 'msg': format(e), 'error': True, 'link_id': item['link_id'] }) return result
def process_request(self, data): result = { 'error': True, 'msg': None, 'data': None, 'ref': 'FB', 'type': None } link_id = get_master_attr('link_id', data, '') token = get_master_attr('token', data, '') url = 'https://graph.facebook.com/' + link_id + '?fields=reactions.summary(true),comments.summary(true),shares,likes&access_token=' + token proxy = get_master_attr('proxy', data, None) s = requests.Session() if proxy: proxies = { "https": proxy, "http": proxy } s.proxies = proxies try: show_debug('Call request: %s' % url) response = s.get(url, timeout=10) except requests.ConnectionError as err: show_warning(format(err)) result['type'] = 'requests' result['msg'] = str(err) else: d = response.json() show_warning('Error fetch api fb') print(d) if get_master_attr('error', d, None) is None: result['error'] = False result['data'] = { 'link_id': data['link_id'], 'likes': get_master_attr('likes.count', d, None), 'shares': get_master_attr('shares.count', d, None), 'comments': get_master_attr('comments.count', d, None), 'reactions': get_master_attr('reactions.summary.total_count', d, None), 'created_time': get_master_attr('created_time', d, None), 'updated_at': str(datetime.datetime.utcnow()) } else: result['type'] = 'api_fb_error' result['msg'] = get_master_attr('error.message', d, 'Error connect api fb') code = get_master_attr('error.code', d, None) if code == 190: result['type'] = 'token' elif code == 100: result['type'] = 'link_id' return result
from Configs.enum import ServerConfig from CrawlerLib.helper import get_sys_params, get_master_attr, print_header_log from CrawlerLib.scheduletask_helper import start_schedule, job from CrawlerLib.server import create_server import socket import json import re from CrawlerLib.servercommand_helper import process_save_data_link, send_http_json_result, \ process_download_attachment, send_http_result, process_take_info_link, get_info_request, process_update_link, \ process_delete_link from CrawlerLib.show_notify import show_text, show_warning, show_notify, show_debug print_header_log() params = get_sys_params() port = get_master_attr('port', params, None) num_client = get_master_attr('num_client', params, 3) check = True if port is None: print('Field port is required') if num_client is None: print('Field num_client is required') port = int(port) num_client = int(num_client) class ServerCommand: result = {"error": True, "msg": "", "data": None}
def get_data_hook(link_id, link): link_type = get_master_attr('type', link, None) data = { 'link_id': link_id, 'user_id': get_master_attr('profile.id', link, None), 'user_name': get_master_attr('profile.username', link, None), 'user_display': get_master_attr('profile.display_name', link, None), 'post_created_time': get_master_attr('post_created_time', link, None), 'type': link_type, 'screenshot': get_master_attr('screenshot', link, None) } if link_type == constant.TYPE_FB: data['reactions'] = get_master_attr('reactions', link, None) data['comments'] = get_master_attr('comments', link, None) data['shares'] = get_master_attr('shares', link, None) if link_type == constant.TYPE_INS: data['likes'] = get_master_attr('likes', link, None) data['comments'] = get_master_attr('comments', link, None) if link_type == constant.TYPE_YTB: data['views'] = get_master_attr('views', link, None) data['comments'] = get_master_attr('comments', link, None) data['likes'] = get_master_attr('likes', link, None) data['dislikes'] = get_master_attr('dislikes', link, None) return data