Beispiel #1
0
    def process_thread_screenshot(self, post_type, post_id):
        try:
            # show_debug('Process take screenshot ...' + post_id)
            link = MongodbClient.get_instance().get_link_collection().find_one(
                {'link_id': post_id})
            if link:
                data = {'processing_screenshot': 0}
                screenshot = self.selenium_types[post_type].screen_post(
                    self, post_id)
                if screenshot:
                    data['screenshot'] = screenshot
                MongodbClient.get_instance().get_link_collection().update_one(
                    {'_id': link['_id']}, {'$set': data})

                data = {
                    'link_id': get_master_attr('link_id', link, None),
                    'type': get_master_attr('type', link, None),
                    'screenshot': get_master_attr('screenshot', link, None)
                }
                hook_url = link['hook_url']
                result = requests.post(hook_url, data)
            else:
                show_debug('NOT FOUND LINK')
        except Exception as e:
            print('error code: #117228')
            print(format(e))
 def exec(self, args):
     if 'link_id' in args:
         link= MongodbClient.get_instance().get_link_collection().find_one({'link_id': args['link_id']})
         pprint.pprint(link)
         print('histories:')
         histories = MongodbClient.get_instance().get_link_history_collection().find({'link_id': args['link_id']})
         pprint.pprint(list(histories))
     else:
         links = MongodbClient.get_instance().get_link_collection().find()
         for link in links:
            pprint.pprint(link)
     return None
 def exec(self, argv):
     db = MongodbClient.get_instance().get_crawler_db()
     #  drop database
     db.links.drop()
     db.create_collection('links')
     print('drop database')
     # init database
     data_command = {
         "collMod": "links",
         "validator": {
             "$jsonSchema": {
                 "bsonType": "object",
                 "required": ["link_id", "type"],
                 "properties": {
                     "type": {
                         "enum": ["FB", "IG", "YT"],
                         "description":
                         "can only be one of enum values [FB, IN, YT] and is required"
                     },
                     "link_id": {
                         "bsonType": "string",
                         "description": "must be a string and required"
                     }
                 }
             }
         }
     }
     db.command(data_command)
     db['links'].create_index([('link_id', pymongo.TEXT)], unique=True)
     return None
Beispiel #4
0
def process_take_info_link(link_id):
    mongodb = MongodbClient.get_instance()
    link_collection = mongodb.get_link_collection()
    if link_id is None:
        link = list(link_collection.find())
    else:
        link = link_collection.find_one({"link_id": link_id})
    return link
Beispiel #5
0
def process_update_link(link_id, data):
    allow_keys = ['link_id', 'status', 'type']
    mongodb = MongodbClient.get_instance()
    link_collection = mongodb.get_link_collection()
    link = link_collection.find_one({'link_id': link_id})
    params = {}
    if link:
        for key in allow_keys:
            value = get_master_attr(key, data, None)
            if value is not None:
                params[key] = value

        return link_collection.update({'_id': link['_id']}, {"$set": params})

    return None
    def exec(self, args):
        data_links = [{
            "link_id":
            "100003803082906_1509228895880532",
            "type":
            "fb",
            "loop": [
                datetime.fromtimestamp(time.time() + 60 * 0).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 1).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 3).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 5).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 7).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 9).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() +
                                       60 * 11).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() +
                                       60 * 13).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 15).strftime('%H:%M')
            ]
        }, {
            "link_id":
            "BqkRJwMFtMb",
            "type":
            "ins",
            "loop": [
                datetime.fromtimestamp(time.time() + 60 * 0).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 2).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 4).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 6).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 8).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() +
                                       60 * 10).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() +
                                       60 * 12).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 14).strftime('%H:%M')
            ]
        }, {
            "link_id":
            "j8U06veqxdU",
            "type":
            "ytb",
            "loop": [
                datetime.fromtimestamp(time.time() + 60 * 0).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 2).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 4).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 6).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 8).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() +
                                       60 * 10).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() +
                                       60 * 12).strftime('%H:%M'),
                datetime.fromtimestamp(time.time() + 60 * 14).strftime('%H:%M')
            ]
        }]
        result = []
        link_collection = MongodbClient.get_instance().get_link_collection()
        link_collection.drop()
        for link in data_links:
            result.append(link_collection.insert(link))

        return result
Beispiel #7
0
def process_save_data_link(data):
    result = {"error": False, "msg": "Completed", 'data': []}
    mongodb = MongodbClient.get_instance()
    link_collection = mongodb.get_link_collection()

    items = get_master_attr('body', data, [])
    hook_url = get_master_attr('hook_url', data, None)
    for item in items:
        # format deadline
        matches = re.findall(r'(\d{4})(\d{2})(\d{2})', item['deadline'])
        if len(matches) > 0:
            item['deadline'] = datetime.datetime(int(matches[0][0]),
                                                 int(matches[0][1]),
                                                 int(matches[0][2]))
        else:
            item['deadline'] = datetime.datetime.utcnow()

        # format deadline start
        matches = re.findall(r'(\d{4})(\d{2})(\d{2})',
                             get_master_attr('camp_start', item, ''))
        if len(matches) > 0:
            item['camp_start'] = datetime.datetime(int(matches[0][0]),
                                                   int(matches[0][1]),
                                                   int(matches[0][2]))
        else:
            item['camp_start'] = datetime.datetime(
                datetime.datetime.utcnow().year,
                datetime.datetime.utcnow().month,
                datetime.datetime.utcnow().day)

        # format timeline
        timeline = get_master_attr('timeline', item, [])
        if len(timeline) > 0:
            count = 0
            for itime in timeline:
                matches = re.findall(r'(\d{2}):(\d{2})', itime)
                if len(matches) > 0:
                    timeline[count] = '%s:00' % matches[0][0]
                else:
                    timeline[count] = '00:00'
                count += 1
        item['timeline'] = timeline
        item['created_at'] = datetime.datetime.utcnow()
        item['updated_at'] = datetime.datetime.utcnow()
        # item['deadline'] = datetime.datetime.utcnow()
        item['status'] = 1
        item['hook_url'] = hook_url
        try:
            result['data'].append({
                'msg': 'Success',
                'error': False,
                'link_id': item['link_id']
            })
            link_collection.insert(item)
        except pymongo.errors.DuplicateKeyError as e:
            del item['_id']
            link_collection.update({'link_id': item['link_id']},
                                   {'$set': item})
            result['data'].append({
                'msg': 'Replace',
                'error': False,
                'link_id': item['link_id']
            })
        except Exception as e:
            result['data'].append({
                'msg': format(e),
                'error': True,
                'link_id': item['link_id']
            })
    return result
Beispiel #8
0
def process_delete_link(link_id):
    mongodb = MongodbClient.get_instance()
    link_collection = mongodb.get_link_collection()
    return link_collection.delete_one({'link_id': link_id})
 def __init__(self):
     self.mongodb = MongodbClient.get_instance()
def init_data(arr):
    for data in arr:
        data['timeline'] = '00:00'
        data['camp_start'] = datetime.datetime.now()
        data['deadline'] = datetime.datetime.now()
        MongodbClient.get_instance().get_link_collection().insert(data)
Beispiel #11
0
import sched
import time
import datetime
from Background.masterclient import assign_task
from Configs import constant
from CrawlerLib.Pymongo import MongodbClient
from CrawlerLib.helper import get_utc_time, get_master_attr
from CrawlerLib.show_notify import show_warning, show_debug
import requests
import threading

client = MongodbClient.get_instance()


def job(data):
    assign_task(data)
    process_result_callback(data['link_id'])


def get_data_hook(link_id, link):
    link_type = get_master_attr('type', link, None)
    data = {
        'link_id': link_id,
        'user_id': get_master_attr('profile.id', link, None),
        'user_name': get_master_attr('profile.username', link, None),
        'user_display': get_master_attr('profile.display_name', link, None),
        'post_created_time': get_master_attr('post_created_time', link, None),
        'type': link_type,
        'screenshot': get_master_attr('screenshot', link, None)
    }