def process_thread_screenshot(self, post_type, post_id):
        try:
            # show_debug('Process take screenshot ...' + post_id)
            link = MongodbClient.get_instance().get_link_collection().find_one(
                {'link_id': post_id})
            if link:
                data = {'processing_screenshot': 0}
                screenshot = self.selenium_types[post_type].screen_post(
                    self, post_id)
                if screenshot:
                    data['screenshot'] = screenshot
                MongodbClient.get_instance().get_link_collection().update_one(
                    {'_id': link['_id']}, {'$set': data})

                data = {
                    'link_id': get_master_attr('link_id', link, None),
                    'type': get_master_attr('type', link, None),
                    'screenshot': get_master_attr('screenshot', link, None)
                }
                hook_url = link['hook_url']
                result = requests.post(hook_url, data)
            else:
                show_debug('NOT FOUND LINK')
        except Exception as e:
            print('error code: #117228')
            print(format(e))
    def listen(self):
        while True:
            try:
                data = _recev(self.client)
                if not data:
                    show_warning('Not data')
                    break
                if "action" in data:
                    if data['action'] == 'notify' and data[
                            'ref'] == 'subscribed':
                        show_notify("subscribe was successfully")

                    if data['action'] == 'assign':
                        show_text('== NEW TASK Assign ===')
                        show_debug(
                            'Receiver assign task with link %s' %
                            get_master_attr('params.link_id', data, None))
                        self.do_assign(data['params'])

                    if data['action'] == 'live':
                        _send(self.client, {'action': 'live', 'status': True})
            except ConnectionError as err:
                show_warning("OS error: {0}".format(err))
            except Exception as err1:
                show_warning("OS error: {0}".format(err1))
                self.client.close()
                break
Exemple #3
0
    def process_response(self, result):
        show_debug('processing response ...')
        link = self.mongodb.get_link_collection().find_one(
            {'link_id': result['data']['link_id']})
        collection_history = self.mongodb.get_link_history_collection()
        if link:
            item = {
                'profile': get_master_attr('data.profile', result, None),
                'likes': result['data']['likes'],
                'comments': result['data']['comments'],
                'post_created_time': result['data']['created_time'],
                'updated_at': result['data']['updated_at']
            }

            # screenshot
            Selenium.get_instance().screen_post('IG',
                                                result['data']['link_id'])
            item['processing_screenshot'] = 1
            item['screenshot'] = None

            res = self.mongodb.get_link_collection().update_one(
                {'_id': link['_id']}, {'$set': item})
            item['link_id'] = result['data']['link_id']
            collection_history.insert_one(item)
            if res:
                return 1
            return 0
        return -1
Exemple #4
0
    def process_crawler_links(self, connection, request_info):
        self.init_result()
        method = request_info['method']
        if method == 'POST':
            data = request_info['data']
            crawler_type = get_master_attr('type', data, None)
            if crawler_type == 'timeline':
                print_header_log()
                start_schedule()

            if crawler_type == 'links':

                def process_list_job(arr):
                    for j in arr:
                        job(j)

                def process_jobs(link_social):
                    print('Total link: ', len(link_social))
                    x = threading.Thread(
                        target=process_list_job,
                        args=(link_social[0:int(len(link_social) / 2)], ))
                    y = threading.Thread(
                        target=process_list_job,
                        args=(link_social[int(len(link_social) /
                                              2):len(link_social)], ))
                    x.start()
                    y.start()
                    send_http_json_result(connection, {
                        'error': False,
                        'msg': 'Success'
                    })

                links = get_master_attr('links', data, [])
                crawlerparams = []
                for link in links:
                    l = process_take_info_link(link)
                    if l is not None:
                        crawlerparams.append({
                            'link_id': l['link_id'],
                            'type': l['type']
                        })
                process_jobs(crawlerparams)
                send_http_json_result(connection, {
                    'error': False,
                    'msg': 'Success'
                })
Exemple #5
0
    def process_request(self, data):
        result = {
            'error': True,
            'msg': None,
            'data': None,
            'ref': YoutubeLink.get_name()
        }
        url = 'https://www.googleapis.com/youtube/v3/videos?part=statistics&id=%s&key=%s' % (data['link_id'], ServerConfig.API_YTB_KEY.value)
        proxy = get_master_attr('proxy', data, None)
        s = requests.Session()
        if proxy:
            proxies = {
                "https": proxy,
                "http": proxy
            }
            s.proxies = proxies

        try:
            show_debug('Call request: %s' % url)
            response = s.get(url, timeout=10)
        except requests.ConnectionError as err:
            show_warning(format(err))
            result['type'] = 'requests'
            result['msg'] = str(err)
        except requests.HTTPError as err:
            show_warning(format(err))
        else:
            d = response.json()
            if 'error' not in d:
                result['error'] = False
                result['data'] = {
                    'link_id': data['link_id'],
                    'dislikes': get_master_attr('items.0.statistics.likeCount', d, None),
                    'likes': get_master_attr('items.0.statistics.dislikeCount', d, None),
                    'views': get_master_attr('items.0.statistics.viewCount', d, None),
                    'comments': get_master_attr('items.0.statistics.commentCount', d, None),
                    'created_time': None,
                    'updated_at': str(datetime.datetime.utcnow())
                }
            else:
                result['msg'] = get_master_attr('error.errors.0.message', d, 'Error from api youtube')
                if get_master_attr('error.code', d, None) == 400:
                    if get_master_attr('error.errors.0.reason', d, None) == 'keyInvalid':
                        result['type'] = 'api_key'
                        result['msg'] = 'Api key error'
                    else:
                        result['type'] = 'link_id'
                        result['msg'] = 'Link id error'

                else:
                    result['type'] = 'youtube_error'

        return result
Exemple #6
0
    def process_links(self, connection, request_info):
        self.init_result()
        method = request_info['method']
        if method == 'GET':
            self.result['error'] = False
            link_id = get_master_attr('query_params.2', request_info, None)
            self.result['data'] = process_take_info_link(link_id)
            send_http_json_result(connection, self.result)

        if method == 'POST':
            # process insert data
            show_debug('Insert link data')
            data = request_info['data']
            show_debug('data body')
            print(data)
            show_debug('Processing save data ...')
            self.result = process_save_data_link(data)
            show_notify('Success!')
            print(self.result)
            send_http_json_result(connection, self.result)

        if method == 'PUT':
            link_id = get_master_attr('query_params.2', request_info, None)
            show_debug('Edit link data: %s' % link_id)
            data = request_info['data']
            print(data)
            show_debug('Processing ... ')
            if link_id:
                result = process_update_link(link_id, data)
                if result:
                    self.result['msg'] = 'Updated'
                self.result['error'] = False
            send_http_json_result(connection, self.result)

        if method == 'DELETE':
            link_id = get_master_attr('query_params.2', request_info, None)
            show_debug('DELETE link data: %s' % link_id)
            show_debug('Processing ... ')
            if link_id:
                if process_delete_link(link_id):
                    self.result['msg'] = 'Deleted'
                self.result['error'] = False
            send_http_json_result(connection, self.result)
Exemple #7
0
    def process_request(self, data):
        result = {'error': True, 'msg': None, 'data': None, 'ref': 'IG'}
        url = 'https://www.instagram.com/p/' + data['link_id']

        proxy = get_master_attr('proxy', data, None)
        s = requests.Session()
        if proxy:
            proxies = {"https": proxy, "http": proxy}
            s.proxies = proxies

        try:
            response = s.get(url, timeout=10)
        except requests.ConnectionError as err:
            result['type'] = 'requests'
            result['msg'] = str(err)
        else:
            html = response.text
            regex = r"window._sharedData = {(.*)};</script>"
            matches = re.findall(regex, html, re.DOTALL)
            if matches:
                d = json.loads('{' + matches[0] + '}')
                result['error'] = False
                result['data'] = {
                    'link_id':
                    data['link_id'],
                    'likes':
                    get_master_attr(
                        'entry_data.PostPage.0.graphql.shortcode_media.edge_media_preview_like.count',
                        d, None),
                    'comments':
                    get_master_attr(
                        'entry_data.PostPage.0.graphql.shortcode_media.edge_media_preview_comment.count',
                        d, None),
                    'created_time':
                    get_master_attr(
                        'entry_data.PostPage.0.graphql.shortcode_media.taken_at_timestamp',
                        d, None),
                    'updated_at':
                    str(datetime.datetime.utcnow()),
                    'profile': {
                        'id':
                        get_master_attr(
                            'entry_data.PostPage.0.graphql.shortcode_media.owner.id',
                            d, None),
                        'username':
                        get_master_attr(
                            'entry_data.PostPage.0.graphql.shortcode_media.owner.username',
                            d, None),
                        'display_name':
                        get_master_attr(
                            'entry_data.PostPage.0.graphql.shortcode_media.owner.full_name',
                            d, None)
                    }
                }
            else:
                result['msg'] = 'Not detect link'
                result['type'] = 'link_id'
        return result
Exemple #8
0
def process_update_link(link_id, data):
    allow_keys = ['link_id', 'status', 'type']
    mongodb = MongodbClient.get_instance()
    link_collection = mongodb.get_link_collection()
    link = link_collection.find_one({'link_id': link_id})
    params = {}
    if link:
        for key in allow_keys:
            value = get_master_attr(key, data, None)
            if value is not None:
                params[key] = value

        return link_collection.update({'_id': link['_id']}, {"$set": params})

    return None
Exemple #9
0
def process_result_callback(link_id):
    link = client.get_link_collection().find_one({"link_id": link_id})
    if not link:
        print('Not found link')
        return None
    hook_url = get_master_attr('hook_url', link, None)
    if hook_url:
        data = get_data_hook(link_id, link)
        try:
            requests.post(hook_url, data)
        except requests.exceptions.ConnectionError as e1:
            show_warning(format(e1))
        except Exception as e:
            show_warning(format(e))
        show_debug('Hook request %s' % link_id)
        print(data)
Exemple #10
0
    def listen(self):
        s = create_server(ServerConfig.IP_ADDRESS.value, port, num_client)
        while True:
            try:
                connection, client_address = s.accept()
                data = b''
                connection.settimeout(0.5)
                show_text('====== NEW TASK =======')
                try:
                    while True:
                        try:
                            msg = connection.recv(1024)
                            if not msg:
                                break
                            data += msg
                            matches = re.findall(r'\r\n\r\n$', msg.decode())
                            if len(matches) > 0:
                                break
                        except socket.error:
                            break
                        except Exception as e:
                            print(e)
                            break
                    request_info = get_info_request(data.decode())
                    action = get_master_attr('query_params.1', request_info,
                                             None)

                    # process main action
                    if action == 'attachments':
                        self.process_attachment(connection, request_info)

                    if action == 'links':
                        self.process_links(connection, request_info)

                    if action == 'crawler-links':
                        self.process_crawler_links(connection, request_info)

                except Exception as e:
                    show_warning(format(e))
                    result = {"error": True, "msg": format(e)}
                    send_http_json_result(connection, result)
                connection.close()
            except socket.error as err:
                print(err)
    def process_response(self, result):
        show_debug('processing response ...')
        link_id = get_master_attr('data.link_id', result, None)

        # get user id
        matches = re.findall(r'(.*)_(.*)', link_id)
        user_id = None
        if len(matches):
            user_id = matches[0][0]

        link = self.mongodb.get_link_collection().find_one({'link_id': link_id})
        collection_history = self.mongodb.get_link_history_collection()
        if link:
            item = {
                'profile': {
                    'id': user_id,
                },
                'likes': result['data']['likes'],
                'comments': result['data']['comments'],
                'reactions': result['data']['reactions'],
                'shares': result['data']['shares'],
                'post_created_time': result['data']['created_time'],
                'updated_at': result['data']['updated_at']
            }

            Selenium.get_instance().screen_post('FB', link_id)
            item['processing_screenshot'] = 1
            item['screenshot'] = None

            res = self.mongodb.get_link_collection().update_one({
                '_id': link['_id']
            }, {
                '$set': item
            })
            item['link_id'] = result['data']['link_id']
            collection_history.insert_one(item)

            if res:
                return 1
            return 0
        return -1
Exemple #12
0
def process_save_data_link(data):
    result = {"error": False, "msg": "Completed", 'data': []}
    mongodb = MongodbClient.get_instance()
    link_collection = mongodb.get_link_collection()

    items = get_master_attr('body', data, [])
    hook_url = get_master_attr('hook_url', data, None)
    for item in items:
        # format deadline
        matches = re.findall(r'(\d{4})(\d{2})(\d{2})', item['deadline'])
        if len(matches) > 0:
            item['deadline'] = datetime.datetime(int(matches[0][0]),
                                                 int(matches[0][1]),
                                                 int(matches[0][2]))
        else:
            item['deadline'] = datetime.datetime.utcnow()

        # format deadline start
        matches = re.findall(r'(\d{4})(\d{2})(\d{2})',
                             get_master_attr('camp_start', item, ''))
        if len(matches) > 0:
            item['camp_start'] = datetime.datetime(int(matches[0][0]),
                                                   int(matches[0][1]),
                                                   int(matches[0][2]))
        else:
            item['camp_start'] = datetime.datetime(
                datetime.datetime.utcnow().year,
                datetime.datetime.utcnow().month,
                datetime.datetime.utcnow().day)

        # format timeline
        timeline = get_master_attr('timeline', item, [])
        if len(timeline) > 0:
            count = 0
            for itime in timeline:
                matches = re.findall(r'(\d{2}):(\d{2})', itime)
                if len(matches) > 0:
                    timeline[count] = '%s:00' % matches[0][0]
                else:
                    timeline[count] = '00:00'
                count += 1
        item['timeline'] = timeline
        item['created_at'] = datetime.datetime.utcnow()
        item['updated_at'] = datetime.datetime.utcnow()
        # item['deadline'] = datetime.datetime.utcnow()
        item['status'] = 1
        item['hook_url'] = hook_url
        try:
            result['data'].append({
                'msg': 'Success',
                'error': False,
                'link_id': item['link_id']
            })
            link_collection.insert(item)
        except pymongo.errors.DuplicateKeyError as e:
            del item['_id']
            link_collection.update({'link_id': item['link_id']},
                                   {'$set': item})
            result['data'].append({
                'msg': 'Replace',
                'error': False,
                'link_id': item['link_id']
            })
        except Exception as e:
            result['data'].append({
                'msg': format(e),
                'error': True,
                'link_id': item['link_id']
            })
    return result
    def process_request(self, data):
        result = {
            'error': True,
            'msg': None,
            'data': None,
            'ref': 'FB',
            'type': None
        }

        link_id = get_master_attr('link_id', data, '')
        token = get_master_attr('token', data, '')
        url = 'https://graph.facebook.com/' + link_id + '?fields=reactions.summary(true),comments.summary(true),shares,likes&access_token=' + token
        proxy = get_master_attr('proxy', data, None)
        s = requests.Session()
        if proxy:
            proxies = {
                "https": proxy,
                "http": proxy
            }
            s.proxies = proxies

        try:
            show_debug('Call request: %s' % url)
            response = s.get(url, timeout=10)
        except requests.ConnectionError as err:
            show_warning(format(err))
            result['type'] = 'requests'
            result['msg'] = str(err)
        else:
            d = response.json()
            show_warning('Error fetch api fb')
            print(d)
            if get_master_attr('error', d, None) is None:
                result['error'] = False
                result['data'] = {
                    'link_id': data['link_id'],
                    'likes': get_master_attr('likes.count', d, None),
                    'shares': get_master_attr('shares.count', d, None),
                    'comments': get_master_attr('comments.count', d, None),
                    'reactions': get_master_attr('reactions.summary.total_count', d, None),
                    'created_time': get_master_attr('created_time', d, None),
                    'updated_at': str(datetime.datetime.utcnow())
                }
            else:
                result['type'] = 'api_fb_error'
                result['msg'] = get_master_attr('error.message', d, 'Error connect api fb')
                code = get_master_attr('error.code', d, None)
                if code == 190:
                    result['type'] = 'token'
                elif code == 100:
                    result['type'] = 'link_id'
        return result
Exemple #14
0
from Configs.enum import ServerConfig
from CrawlerLib.helper import get_sys_params, get_master_attr, print_header_log
from CrawlerLib.scheduletask_helper import start_schedule, job
from CrawlerLib.server import create_server
import socket
import json
import re
from CrawlerLib.servercommand_helper import process_save_data_link, send_http_json_result, \
    process_download_attachment, send_http_result, process_take_info_link, get_info_request, process_update_link, \
    process_delete_link
from CrawlerLib.show_notify import show_text, show_warning, show_notify, show_debug

print_header_log()

params = get_sys_params()
port = get_master_attr('port', params, None)
num_client = get_master_attr('num_client', params, 3)

check = True
if port is None:
    print('Field port is required')

if num_client is None:
    print('Field num_client is required')

port = int(port)
num_client = int(num_client)


class ServerCommand:
    result = {"error": True, "msg": "", "data": None}
Exemple #15
0
def get_data_hook(link_id, link):
    link_type = get_master_attr('type', link, None)
    data = {
        'link_id': link_id,
        'user_id': get_master_attr('profile.id', link, None),
        'user_name': get_master_attr('profile.username', link, None),
        'user_display': get_master_attr('profile.display_name', link, None),
        'post_created_time': get_master_attr('post_created_time', link, None),
        'type': link_type,
        'screenshot': get_master_attr('screenshot', link, None)
    }

    if link_type == constant.TYPE_FB:
        data['reactions'] = get_master_attr('reactions', link, None)
        data['comments'] = get_master_attr('comments', link, None)
        data['shares'] = get_master_attr('shares', link, None)

    if link_type == constant.TYPE_INS:
        data['likes'] = get_master_attr('likes', link, None)
        data['comments'] = get_master_attr('comments', link, None)

    if link_type == constant.TYPE_YTB:
        data['views'] = get_master_attr('views', link, None)
        data['comments'] = get_master_attr('comments', link, None)
        data['likes'] = get_master_attr('likes', link, None)
        data['dislikes'] = get_master_attr('dislikes', link, None)

    return data