Esempio n. 1
0
    def before_post(self):
        self.Print('%s fired' % inspect.stack()[0][3], Colors.GRAY)
        try:
            col_users = db()['users']
            col_user_logins = db()['user_logins']
            user_info = col_users.find_one({
                'mobile':
                self.params['mobile'],
                'password':
                create_md5(self.params['password'])
            })
            if user_info is None:
                self.set_output('user', 'login_failed')
            else:
                self.user_role = user_info['role']
                user_info = self.after_get_one(user_info)
                self.user_id = user_info['id']
                if user_info['confirmed']:
                    self.user_id = user_info['id']
                    self.set_output('public_operations', 'successful')
                    self.token = encode_token({
                        'user_id': self.user_id,
                        'role': self.user_role
                    }).decode('ascii')
                    self.output['token'] = self.token
                    last_login = col_user_logins.find({
                        'mobile':
                        self.params['mobile']
                    }).sort('date', -1).limit(1)
                    if last_login.count() > 0:
                        user_info['last_login'] = str(last_login[0]['date'])
                    else:
                        user_info['last_login'] = ''
                        user_info['first_login'] = str(datetime.now())
                    if 'last_update' in user_info: del user_info['last_update']
                    if 'password_pure' in user_info:
                        del user_info['password_pure']
                    if 'password' in user_info: del user_info['password']
                    if '_id' in user_info: del user_info['_id']
                    self.output['data']['item'] = user_info
                else:
                    self.set_output('user', 'inactive')
        except:
            PrintException()
            self.set_output('public_operations', 'failed')

        try:
            col_user_logins = db()['user_logins']
            col_user_logins.insert({
                'user_id': self.user_id,
                'mobile': self.params.get('mobile'),
                'status': self.status,
                'date': datetime.now(),
                'notes': self.note_id
            })
        except:
            PrintException()
        self.allow_action = False
Esempio n. 2
0
    def before_post(self):
        for item in self.params:
            if item not in self.inputs['post']:
                self.set_output('tasks', 'wrong_params')
                return False
        print(self.params)
        try:
            if 'reminder_date' in self.params:
                self.params['reminder_date'] = datetime.strptime(
                    self.params['reminder_date'], "%Y-%m-%d %H:%M:%S")

            if 'from_date' in self.params:
                self.params['from_date'] = datetime.strptime(
                    self.params['from_date'], "%Y-%m-%d %H:%M:%S")

            if 'to_date' in self.params:
                self.params['to_date'] = datetime.strptime(
                    self.params['to_date'], "%Y-%m-%d %H:%M:%S")

            self.params['is_done'] = False
            self.params['is_favorite'] = False

        except:
            PrintException()
            return False
        return True
Esempio n. 3
0
def log(type, page_url, selector, data, error, source_id, engine_instance_id):
    # global error_count
    global log_list
    try:
        # from text_reader import error_count
        # error_count += 1
        # col_error_logs.insert_one({
        #     'engine_instance_id': str(engine_instance_id),
        #     'type': type,
        #     'source_id': source_id,
        #     'page_url': page_url,
        #     'selector': selector,
        #     'data': data,
        #     'date': datetime.now(),
        #     'error': error,
        # })
        log_list.append(
            InsertOne({
                'engine_instance_id': str(engine_instance_id),
                'type': type,
                'source_id': source_id,
                'page_url': page_url,
                'selector': selector,
                'data': data,
                'date': datetime.now(),
                'error': error,
            }))
    except:
        PrintException()
Esempio n. 4
0
 def before_post(self):
     try:
         col_users = db()['users']
         user = col_users.find_one({'mobile': self.params['mobile']}, {
             'confirmed': 1,
             'mobile': 1,
             'activation_code': 1
         })
         if user is not None and user['confirmed'] is False:
             # user['activation_code'] = random_digits() if 'activation_code' not in user\
             #                                          or user['activation_code'] == '' or\
             #                                          user['activation_code'] is None else user['activation_code']
             send_sms(
                 sms['users']['registration_successful'][self.locale] %
                 user['activation_code'].encode('utf-8'), user['mobile'])
             self.set_output('user', 'send_sms')
             return True
         elif user is None:
             self.set_output('user', 'user_not_exists')
             return False
         elif user is not None and user['confirmed'] is True:
             self.set_output('user', 'already_active')
             return False
         self.allow_action = False
         return True
     except:
         PrintException()
         return False
Esempio n. 5
0
    def before_post(self):
        self.Print('%s fired' % inspect.stack()[0][3], Colors.GRAY)
        try:
            col_users = self.db['users']
            update_result = col_users.update(
                {
                    'mobile': self.params['mobile'],
                    'activation_code': self.params['activation_code']
                }, {'$set': {
                    'confirmed': True
                }})
            if update_result['nModified'] == 1:
                user_info = col_users.find_one({
                    'mobile': self.params['mobile'],
                })
                # self.output['token'] = encode_token(
                #     {'user_id': str(user_info['_id'])}).decode()

                # send_notification('welcome', str(user_info['_id']), '',
                #                   consts.NOTIFICATIONS['users']['welcome']['title'][self.locale],
                #                   consts.NOTIFICATIONS['users']['welcome']['description'][self.locale], user=None,
                #                   delayed=False)
                self.set_output('user', 'activate_account')
            else:
                self.set_output('user', 'mobile_not_activated')

            self.allow_action = False
            return True
        except:
            PrintException()
            return False
Esempio n. 6
0
    def before_post(self):
        try:
            self.method = 'users'
            col_users = db()['users']
            if col_users.count({'mobile': self.params['mobile']}) > 0:
                self.set_output('user', 'mobile_exists')
                return False
            elif self.params.get('email') is not None:
                if col_users.count({'email': self.params['email']}) > 0:
                    self.set_output('user', 'email_exists')
                    return False
            # if col_users.count({'device_info.mac_address': self.params['device_info']['mac_address']}) > 0:
            #     self.set_output('user', 'multiple_registration')
            #     return False

            self.params['activation_code'] = random_digits()
            self.params['confirmed'] = False
            self.params['role'] = 'user'
            # self.params['tasks_figure'] = 'line' if 'tasks_figure' not in self.params else self.params['tasks_figure']
            self.params['password_pure'] = self.params['password']
            self.params['password'] = create_md5(self.params['password'])
            # .encode('utf-8')
        except:
            PrintException()
            return False
        return True
Esempio n. 7
0
def get_page(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    global logs_list
    global error_count
    try:
        # print(url)
        result = requests.get(url, headers=headers, verify=False)
    except:
        try:
            result = requests.get(url, verify=False)
        except:
            result = ''
            print('error in get page')
            logs_list.append(
                {'type': 'read_url', 'page_url': url, 'selector': '',
                 'data': {},
                 'error': PrintException(),
                 'source_link_id': '',
                 'engine_instance_id': engine_instance_id,
                 'module': 'link_grabber',
                 'date':datetime.now()})
            error_count += 1
            # log_error(type='get_url', page_url=url, selector='', data={},
            #           error=PrintException(), source_id='',
            #           source_link_id='', engine_instance_id=engine_instance_id,
            #           module='link_grabber')
    # f = open('temp.html', 'w')
    # f.write(result.text)
    # f.close()
    finally_result = ''
    if result is not None or result != '':
        finally_result = BeautifulSoup(result.text, 'html.parser')
    return finally_result
Esempio n. 8
0
 def before_post(self):
     self.Print('%s fired' % inspect.stack()[0][3], Colors.GRAY)
     try:
         col_users = db()['users']
         result = col_users.find_one_and_update(
             {
                 'mobile': self.params['mobile'],
                 # 'pure_password': self.params['old_password'],
                 'activation_code': self.params['activation_code']
             },
             {
                 '$set': {
                     'password': create_md5(self.params['new_password']),
                     'password_pure': self.params['new_password']
                 }
             })
         if result is None:
             self.set_output('user', 'password_reset_failed')
         else:
             self.set_output('public_operations', 'successful')
     except:
         PrintException()
         self.set_output('public_operations', 'failed')
         return False
     self.allow_action = False
     return True
Esempio n. 9
0
 def before_get(self):
     try:
         col_tasks = db()['tasks']
         if 'id' in self.params:
             task = col_tasks.find_one({'_id': ObjectId(self.params['id'])})
             task['id'] = str(task['_id'])
             task['from_date'] = str(task['from_date'])
             task['to_date'] = str(task['to_date'])
             del task['_id']
             del task['create_date']
             del task['last_update']
             del task['user_id']
             self.output['data']['item'] = task
         else:
             tasks = []
             for user_task in col_tasks.find({
                     'user_id': str(self.user_id)
             }).sort([('create_date', -1)]):
                 user_task['id'] = str(user_task['_id'])
                 del user_task['_id']
                 if 'from_date' in user_task:
                     user_task['from_date'] = str(user_task['from_date'])
                 if 'to_date' in user_task:
                     user_task['to_date'] = str(user_task['to_date'])
                 del user_task['create_date']
                 del user_task['last_update']
                 del user_task['user_id']
                 tasks.append(user_task)
             self.output['data']['list'] = tasks
         self.set_output('public_operations', 'successful')
     except:
         PrintException()
         return False
     self.allow_action = False
     return True
Esempio n. 10
0
 def before_get(self):
     try:
         self.sort = {'create_date': -1}
     except:
         PrintException()
         return False
     return True
Esempio n. 11
0
    def post(self, *args, **kwargs):
        self.Print('%s fired' % inspect.stack()[0][3], Colors.GRAY)
        self.method = 'post'
        if self.pre_post():
            try:
                col_users = db()['users']
                activation_code = random_digits()
                if 'mobile' in self.params:
                    user_info = col_users.find_one(
                        {'mobile': self.params['mobile']})
                    if user_info is None:
                        self.set_output('user', 'user_not_exists')
                    else:
                        col_users.update_one(
                            {'mobile': self.params['mobile']},
                            {'$set': {
                                'activation_code': activation_code
                            }})
                        send_sms(
                            sms['users']['forgot_password'][self.locale] %
                            activation_code, self.params['mobile'])
                        self.set_output('public_operations', 'successful')
                else:
                    self.set_output('public_operations', 'successful')
                self.after_post()

            except:
                PrintException()
                self.set_output('public_operations', 'failed')
        self.kmwrite()
Esempio n. 12
0
    def before_put(self):
        try:
            inputs = [
                'title', 'from_date', 'to_date', 'tags', 'color',
                'description', 'attachment', 'location', 'remind', 'people',
                'is_done', 'is_favorite', 'id'
            ]
            # print(self.params)
            for item in self.params:
                if item not in inputs:
                    self.set_output('tasks', 'wrong_params')
                    return False
            if 'reminder_date' in self.params:
                self.params['reminder_date'] = datetime.strptime(
                    self.params['reminder_date'], "%Y-%m-%d %H:%M:%S")

            if 'from_date' in self.params:
                self.params['from_date'] = datetime.strptime(
                    self.params['from_date'], "%Y-%m-%d %H:%M:%S")

            if 'to_date' in self.params:
                self.params['to_date'] = datetime.strptime(
                    self.params['to_date'], "%Y-%m-%d %H:%M:%S")

        except:
            PrintException()
            return False
        return True
Esempio n. 13
0
    def after_post(self):
        try:
            send_sms(
                sms['users']['registration_successful'][self.locale] %
                self.params['activation_code'], self.params['mobile'])

        except:
            PrintException()
        return True
Esempio n. 14
0
 def before_get(self):
     try:
         print(self.params)
         if 'id' in self.params:
             self.conditions['_id'] = ObjectId(self.params['id'])
     except:
         PrintException()
         return False
     return True
Esempio n. 15
0
    def put(self, *args, **kwargs):
        try:
            self.method = 'put'
            self.module = 'users'
            if self.pre_put():

                for item in self.params:
                    if item not in self.inputs['put']:
                        self.set_output('tasks', 'wrong_params')
                        return False

                need_consistency_update = any(
                    x in self.params for x in ['pic', 'name', 'family'])
                if 'password' in self.params:
                    self.params['password'] = create_md5(
                        self.params['password'])
                    self.params['password_pure'] = self.params['password']
                col_users = db()['users']
                col_users.update({'_id': ObjectId(self.user_id)},
                                 {'$set': self.params})
                if need_consistency_update:
                    col_people = db()['people']
                    col_tasks = db()['tasks']
                    doc = {}
                    if 'name' in self.params:
                        doc['people.name'] = self.params['name']
                    if 'family' in self.params:
                        doc['people.family'] = self.params['family']
                    if 'pic' in self.params:
                        doc['people.pic'] = self.params['pic']
                    col_tasks.update({'people.id': self.user_id},
                                     {'$set': doc},
                                     multi=True)
                    changes = {}

                    if 'name' in self.params and 'pic' not in self.params:
                        changes['$set'] = {'name': self.params['name']}
                    elif 'pic' in self.params and 'name' not in self.params:
                        changes['$set'] = {'pic': self.params['pic']}
                    elif 'pic' in self.params and 'name' in self.params:
                        changes['$set'] = {
                            'name': self.params['name'],
                            'pic': self.params['pic']
                        }

                    col_people.update({'user_id': self.user_id},
                                      changes,
                                      multi=True)
                self.params['last_update'] = datetime.now()
            self.set_output('public_operations', 'successful')
        except:
            PrintException()
            self.set_output('public_operations', 'failed')
        if consts.LOG_ACTIVE:
            self.log_status(self.output)
        self.after_put()
        self.kmwrite()
Esempio n. 16
0
 def after_get(self, dataset):
     temp = []
     try:
         print('raft tooooosh')
         for item in dataset:
             item['id'] = str(item['_id'])
             del item['_id']
             del item['create_date']
             del item['last_update']
             del item['user_id']
             temp.append(item)
     except:
         PrintException()
     return temp
Esempio n. 17
0
 def before_delete(self):
     try:
         col_tasks = db()['tasks']
         if col_tasks.find_one({'_id': ObjectId(self.params['id'])}):
             col_tasks.delete_one({'_id': ObjectId(self.params['id'])})
             self.set_output('public_operations', 'successful')
         else:
             self.set_output('public_operations', 'record_not_found')
             return False
     except:
         PrintException()
         return False
     self.allow_action = False
     return True
Esempio n. 18
0
 def before_post(self):
     try:
         for item in self.params:
             if item not in self.inputs:
                 self.set_output('tasks', 'wrong_params')
                 return False
         col_save_task = db()['save_task_query']
         if col_save_task.count_documents({'name': self.params['name']
                                           }) > 0:
             self.set_output('save_task', 'duplicate_name')
             return False
     except:
         PrintException()
         return False
     return True
Esempio n. 19
0
 def before_delete(self):
     try:
         self.method = 'users'
         print(self.params['mobile'])
         print(self.user_id)
         col_users = db()['users']
         col_users.delete_one({
             '_id':
             ObjectId(self.params['id']),
             'mobile':
             self.params['mobile'],
             'password':
             create_md5(self.params['password'])
         })
         self.set_output('public_operations', 'successful')
     except:
         PrintException()
     self.allow_action = False
Esempio n. 20
0
def log_error(type, page_url, selector, data, error, source_id, source_link_id, engine_instance_id, module):
    try:
        global logs_list
        global error_count
        error_count += 1
        col_error_logs.insert_one({
            'engine_instance_id': str(engine_instance_id),
            'type': type,
            'source_id': source_id,
            'source_link_id': source_link_id,
            'page_url': page_url,
            'selector': selector,
            'data': data,
            'date': datetime.now(),
            'error': error,
        })
    except:
        PrintException()
        print('LOG PRODUCED LOG!')
Esempio n. 21
0
 def post(self, *args, **kwargs):
     self.Print('%s fired' % inspect.stack()[0][3], Colors.GRAY)
     try:
         self.method = 'post'
         if self.pre_post():
             self.params.update(self.added_data)
             col_users = db()['users']
             self.params['create_date'] = datetime.now()
             self.params['last_update'] = datetime.now()
             id = str(col_users.insert(self.params))
             self.id = id
             self.output['data']['item']['id'] = id
             # self.output['token'] = encode_token({'user_id': self.id}).decode()
             self.set_output('public_operations', 'successful')
             self.after_post()
         if consts.LOG_ACTIVE:
             self.log_status(self.output)
     except:
         PrintException()
         self.set_output('public_operations', 'failed')
     self.kmwrite()
Esempio n. 22
0
 def post(self, *args, **kwargs):
     data = deepcopy(output)
     try:
         file_contents = self.request.files['image'][0]['body']
         file_name = self.request.files['image'][0]['filename']
         type = self.get_argument('type', '')
         file_ext = '.' + file_name.split('.')[-1]
         if not os.path.exists(consts.PDP_ROOT):
             os.mkdir(consts.PDP_ROOT)
         if not os.path.exists(consts.PDP_IMAGES):
             os.mkdir(consts.PDP_IMAGES)
         if type != '':
             if not os.path.exists(consts.PDP_IMAGES + type):
                 os.mkdir(consts.PDP_IMAGES + type)
             if not os.path.exists(consts.PDP_IMAGES + type + '/' +
                                   str(datetime.today().date())):
                 os.mkdir(consts.PDP_IMAGES + type + '/' +
                          str(datetime.today().date()))
             filename = create_md5(str(datetime.now()) +
                                   file_name) + file_ext
             file = open(
                 '%s/%s/%s' % (consts.PDP_IMAGES + type,
                               datetime.today().date(), filename), 'wb')
             file.write(file_contents)
             file.close()
             self.set_output('public_operations', 'successful')
             data['data']['item'] = {
                 'link':
                 '%s/%s/%s' % (consts.ODP_IMAGES + type,
                               datetime.today().date(), filename)
             }
             print(data['data']['item'])
         else:
             self.set_output('field_error', 'file_type')
     except Exception:
         self.set_output('public_operations', 'failed')
         PrintException()
     self.write(data)
Esempio n. 23
0
def do_work(item_info):
    global urls_hash_list
    global content_list
    global logs_list
    global count_mongo
    global error_count
    source_link = item_info['source_link']
    source = item_info['source']
    global count
    count += 1
    try:
        global link_count
        global new_contents
        link_count += 1
        print(link_count)
        html = get_page(source_link['url'])

        if html is not None or html != '':
            # print('html')
            for item in html.select(source_link['box']):
                try:
                    if source_link['link'] == '':
                        href = item['href']
                    else:
                        href = item.select(source_link['link'])
                        href = href[0]['href'] if len(href) != 0 else ''
                except:
                    PrintException()
                    href = ''
                    logs_list.append({'type':'extract_link', 'page_url':source_link['url'], 'selector':source_link['link'], 'data':{},
                                      'error':PrintException(), source_id:str(source['_id']),
                                      'source_link_id':str(source_link['_id']), 'engine_instance_id':engine_instance_id,
                                      'module':'link_grabber',
                                      'date':datetime.now()})
                    error_count += 1
                    # log_error(type='extract_link', page_url=source_link['url'], selector=source_link['link'], data={},
                    #           error=PrintException(), source_id=str(source['_id']),
                    #           source_link_id=str(source_link['_id']), engine_instance_id=engine_instance_id,
                    #           module='link_grabber')

                if href[:2] == '..': href = href.replace('..', '')
                # if href != '' and href[0] != '/': href = '/'+href
                # if col_news.count_documents({'url': source_link['base_url'] + item.select(source_link['link'])[0]['href']}) == 0:

                # col_counter = col_news.count_documents({'url': source_link['base_url'] + href})
                # count_mongo += 1
                # if col_counter == 0:
                try:
                    if source_link['base_url'] not in href:
                        url = source_link['base_url'] + href
                    else:
                        url = href
                except:
                    PrintException()
                    url = ''
                    logs_list.append(
                        {'type': 'read_url', 'page_url': source_link['url'], 'selector': '',
                         'data': {},
                         'error': PrintException(), source_id: str(source['_id']),
                         'source_link_id': str(source_link['_id']), 'engine_instance_id': engine_instance_id,
                         'module': 'link_grabber',
                         'date':datetime.now()})
                    error_count += 1
                    # log_error(type='read_url', page_url=source_link['url'], selector='', data={},
                    #           error=PrintException(), source_id=str(source['_id']),
                    #           source_link_id=str(source_link['_id']), engine_instance_id=engine_instance_id, module='link_grabber')
                if source_link['title'] != '':
                    try:
                            title = item.select(source_link['title'])
                            # if title is None or title != '':
                            #     print('empty title')
                            title = title[0].text.strip()
                            # if len(title) != 0 else ''

                    except:
                        try:
                            # print('title')
                            title = item.select('div.box_economic > h3 > a')[0].text
                        except:
                            try:
                                title = item.select('div.box_economic > h1 > a')[0].text
                            except:
                                # print('title nashod')
                                PrintException()
                                title = ''
                                logs_list.append(
                                    {'type': 'extract_title', 'page_url': source_link['url'], 'selector': source_link['title'],
                                     'data': {},
                                     'error': PrintException(), source_id: str(source['_id']),
                                     'source_link_id': str(source_link['_id']),
                                     'engine_instance_id': engine_instance_id,
                                     'module': 'link_grabber',
                                     'date':datetime.now()})
                                error_count += 1
                else:
                    title = ''
                            # log_error(type='extract_title', page_url=source_link['url'], selector=source_link['title'], data={},
                            #           error=PrintException(), source_id=str(source['_id']),
                            #           source_link_id=str(source_link['_id']), engine_instance_id=engine_instance_id, module='link_grabber')
                if source_link['summary'] != '':
                    try:
                        summary = item.select(source_link['summary'])
                        summary = summary[0].text.strip()
                        # if len(summary) != 0 else ''
                    except:
                        # print('summary error')
                        PrintException()
                        summary = ''
                        logs_list.append(
                            {'type': 'extract_summary', 'page_url': source_link['url'], 'selector': source_link['summary'],
                             'data': {},
                             'error': PrintException(), source_id: str(source['_id']),
                             'source_link_id': str(source_link['_id']),
                             'engine_instance_id': engine_instance_id,
                             'module': 'link_grabber',
                             'date':datetime.now()})
                    error_count += 1
                else:
                    summary = ''
                    # log_error(type='extract_summary', page_url=source_link['url'], selector=source_link['summary'],
                    #           data={}, error=PrintException(), source_id=str(source['_id']),
                    #           source_link_id=str(source_link['_id']), engine_instance_id=engine_instance_id, module='link_grabber')

                if source_link['date'] != '':
                    try:
                        date = item.select(source_link['date'])
                        date = date[0].text.strip()
                        # if len(date) != 0 else ''
                    except:
                        PrintException()
                        date = ''
                        logs_list.append(
                            {'type': 'extract_date', 'page_url': source_link['url'], 'selector': source_link['date'],
                             'data': {},
                             'error': PrintException(), source_id: str(source['_id']),
                             'source_link_id': str(source_link['_id']),
                             'engine_instance_id': engine_instance_id,
                             'module': 'link_grabber',
                             'date':datetime.now()})
                        error_count += 1
                else:
                    # print('date')
                    date = ''
                    # log_error(type='extract_date', page_url=source_link['url'], selector=source_link['date'], data={},
                    #           error=PrintException(), source_id=str(source['_id']),
                    #           source_link_id=str(source_link['_id']), engine_instance_id=engine_instance_id, module='link_grabber')
                if source_link['image'] != '':
                    try:
                        selected = item.select(source_link['image'])
                        try:
                            image = selected[0]['data-src']
                            # if len(selected) != 0 else ''
                        except:
                            image = selected[0]['src']
                        if 'http' not in image:
                            if image != '' and image[0] != '/': image = '/'+image
                            image = source_link['base_url']+image
                    except:
                        PrintException()
                        image = ''
                        logs_list.append(
                            {'type': 'extract_image', 'page_url': source_link['url'], 'selector': source_link['image'],
                             'data': {},
                             'error': PrintException(), source_id: str(source['_id']),
                             'source_link_id': str(source_link['_id']),
                             'engine_instance_id': engine_instance_id,
                             'module': 'link_grabber',
                             'date':datetime.now()})
                        error_count += 1
                else:
                    image = ''
                    # log_error(type='extract_image', page_url=source_link['url'], selector=source_link['image'],
                    #           data={}, error=PrintException(), source_id=str(source['_id']),
                    #           source_link_id=str(source_link['_id']), engine_instance_id=engine_instance_id, module='link_grabber')
                url_hash = create_md5(url)
                urls_hash_list.append(url_hash)
                new_contents += 1
                # if col_news.count_documents({'url_hash': url_hash}) == 0:
                content_list.append({
                    'source_id': str(source['_id']),
                    'link_grabber_id': engine_instance_id,
                    'source_link_id': str(source_link['_id']),
                    'source_name': source['name'],
                    'create_date': datetime.now(),
                    'last_update': datetime.now(),
                    'url': url.encode('utf-8'),
                    'url_hash': url_hash,
                    'title': title.encode('utf-8'),
                    'summary': summary.encode('utf-8'),
                    'date': date,
                    'source_url': source_link['url'],
                    'status': 'summary',
                    'image': image,
                    'text_selector': source_link['text'],
                    'category_id': source_link['category_id'] if 'category_id' in source_link else '',
                    'text': '',
                    'html': '',
                })
        else:
            print(html)
            logs_list.append(
                {'type': 'get html', 'page_url': source_link['url'], 'selector': source_link['image'],
                 'data': {},
                 'error': PrintException(), source_id: str(source['_id']),
                 'source_link_id': str(source_link['_id']),
                 'engine_instance_id': engine_instance_id,
                 'module': 'link_grabber',
                 'date':datetime.now()})
    except Exception as e:
        # print(e)
        logs_list.append(
            {'type': 'read_url', 'page_url': source_link['url'], 'selector': '',
             'data': {},
             'error': PrintException(), source_id: str(source['_id']),
             'source_link_id': str(source_link['_id']),
             'engine_instance_id': engine_instance_id,
             'module': 'link_grabber',
             'date':datetime.now()})
Esempio n. 24
0
    try:
        # con= MongoClient()
        # db = con['ldb']
        col_logs = db()['logs']
        logs_list = []
        ids_list = []

        for item in col_logs.find({'sent': {'$exists': False}}).limit(20):
            ids_list.append(item['_id'])
            del item['_id']
            item['date'] = str(item['date'])
            logs_list.append(item)

        # ADMIN_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjoiNWRiZmQxMjAxNzQ5ZWYxYjNiMGEwMjFkIiwicm9sZSI6ImFkbWluIiwiZGF0ZSI6IjIwMTktMTEtMDQgMTE6MTA6MDguMjg4MDk1In0.AtVoRKDTv1jvcHGpRmuLJvD9gG8sWqdR6P-Ibc6WDsU'
        ADMIN_TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjoiNWRjZDU4MDZiZWU4YTZhODFjMzE0MDhkIiwicm9sZSI6ImFkbWluIiwiZGF0ZSI6IjIwMTktMTEtMTQgMTc6MDg6NDMuNDAzNDA4In0.QUCCJHXYcSMA-pyHFCQn8zBiHS7C48MpoFAS1T9JUWk'
        logs = dumps(logs_list)
        params = {'params': logs, 'token': ADMIN_TOKEN}
        par = loads(params['params'])
        print(par[0])
        requests.post("http://localhost:8585/v1/collect", json=params)

        col_logs.update_many({'_id': {
            '$in': ids_list
        }}, {'$set': {
            'sent': True
        }})
    except:
        PrintException()
except:
    PrintException()
Esempio n. 25
0
    def before_get(self):
        try:
            """
            
            amount = number of days, 12 days for example
            time = (now) or (future) or (pass)
            tags = [sport, art, ...]
            from = (2020-05-12) or (now)
            time_point = (after) or (in) or (to)
            type_date = (from_date) or (to_date)
            
            you have too many tasks and you want get some query you want from these tasks.
            these tasks have date(from_date & to_date)type and this date is with start date and end date
            as called from_date(start) and to_date(end)
            so maybe you want to make some query that have all tasks that started before one week later or
             after one week later or exactly after week, for example you want to see all tasks stared after one week later
              that have sport and art tags too.
            attention in this letters = all tasks stared after one week later that have sport & art tags
            it's means :
            all tasks started => type_date : from_date
            one week => date now + 7 days, so is => amount : 7 & from : now
            one week later => date now + 7 days(date now == from:now, '+' == time:future, 7 == amount:7) ,
             so is =>(amount : 7) & (from : now) & (time : future)
            ***one week ago => (amount : 7) & (from : now) & (time : pass('-'))***
            after => time_point : after
            ***if from now to one week later => time_point : to***
            ***if exactly one week later => time_point : in***
            that have sport & art tags => tags : ['sport', 'art']
            
            in this case you have to set 'time' to the 'future' and 'time_point' to the 'after' and set 'from' to the 'now'
             and 'type_date' to the 'from_date' and 'amount' equal to 7 and set 'tags' to the ['sport', 'art']
            this below codes makes your query with params that you set
            
            if you set time to the now this query show you all task for today not anything else
            or all task is running in specified date (from : 2020-08-12, time : now)
            
            """
            # date_now = datetime.strptime(str(datetime.now())[:19], "%Y-%m-%d %H:%M:%S")

            queries = []
            col_saved_tasks = db()['save_task_query']
            for item in col_saved_tasks.find({'user_id': self.user_id}):
                query = {}
                query['user_id'] = self.user_id
                if 'tags' in item and item['tags'] != []:
                    query['tags'] = {'$in': item['tags']}

                if 'reminder_date' in item:
                    query['reminder_date'] = datetime.strptime(
                        item['reminder_date'], "%Y-%m-%d %H:%M:%S")

                if 'from' in item and item['from'] == 'now':
                    date_point = datetime.strptime(
                        str(datetime.now())[:19], "%Y-%m-%d %H:%M:%S")
                elif 'from' in item and item['from'] != 'now':
                    date_point = datetime.strptime(item['from'],
                                                   "%Y-%m-%d %H:%M:%S")

                if 'time' in item and item['time'] != 'now':
                    if 'time_point' in item and item['time_point'] == 'after':
                        if 'type_date' in item and 'amount' in item:
                            if item['time'] == 'pass':
                                query[item['type_date']] = {
                                    '$lte':
                                    date_point - timedelta(days=item['amount'])
                                }
                            elif item['time'] == 'future':
                                query[item['type_date']] = {
                                    '$gte':
                                    date_point + timedelta(days=item['amount'])
                                }
                    elif 'time_point' in item and item['time_point'] == 'in':
                        if item['time'] == 'pass':
                            query[item['type_date']] = date_point - timedelta(
                                days=item['amount'])
                        elif item['time'] == 'future':
                            query[item['type_date']] = date_point + timedelta(
                                days=item['amount'])
                    elif 'time_point' in item and item['time_point'] == 'to':
                        if item['time'] == 'pass':
                            query[item['type_date']] =\
                                {'$gte': date_point, '$lte': date_point - timedelta(days=item['amount'])}
                        elif item['time'] == 'future':
                            query[item['type_date']] =\
                                {'$gte': date_point, '$lte': date_point + timedelta(days=item['amount'])}

                elif 'time' in item and item['time'] == 'now':
                    query['$and'] = [{
                        'from_date': {
                            '$lte': date_point
                        }
                    }, {
                        'to_date': {
                            '$gte': date_point
                        }
                    }]
                # queries['id'] = str(item['_id'])
                # queries[item['name']] = query
                queries.append({
                    'id': str(item['_id']),
                    'name': item['name'],
                    'query': query
                })

            print(queries)
            results = []
            col_tasks = db()['tasks']
            for items in queries:
                print(items)
                result_list = []
                for item in col_tasks.find(items['query']):
                    item['id'] = str(item['_id'])
                    del item['_id']
                    if 'create_date' in item:
                        item['create_date'] = str(item['create_date'])
                    if 'last_update' in item:
                        item['last_update'] = str(item['last_update'])
                    if 'from_date' in item:
                        item['from_date'] = str(item['from_date'])
                    if 'to_date' in item:
                        item['to_date'] = str(item['to_date'])
                    result_list.append(item)
                results.append({
                    'id': items['id'],
                    'name': items['name'],
                    'result': result_list
                })
            self.set_output('public_operations', 'successful')
            self.output['data']['list'] = results
        except:
            PrintException()
        self.allow_action = False
Esempio n. 26
0
    def get(self, id=None, *args, **kwargs):
        self.Print('%s fired' % inspect.stack()[0][3], Colors.GRAY)
        try:
            self.method = 'get'
            if self.pre_get():
                try:

                    if len(self.fields) > 0:
                        fields = {}
                        for item in self.fields:
                            fields[item] = 1
                    else:
                        fields = {
                            'name': 1,
                            'family': 1,
                            'email': 1,
                            'pic': 1,
                            'tasks_figure': 1,
                            'mobile': 1
                        }
                    col_users = db()['users']
                    # print('-----------------------------')
                    # print(self.user_id)
                    # print(self.fields)
                    # print('-----------------------------')
                    user_info = col_users.find_one(
                        {'_id': ObjectId(self.user_id)}, fields)
                    user_info['id'] = str(user_info['_id'])
                    del user_info['_id']
                    col_tasks = db()['tasks']
                    user_tasks = []
                    # for item in col_tasks.find({'user_id': {'$in': [self.user_id]}}):
                    #     del item['last_update']
                    #     del item['create_date']
                    #     del item['user_id']
                    #     # item['create_date'] = str(item['create_date'])
                    #     # item['last_update'] = str(item['last_update'])
                    #     if 'from_date' in item:
                    #         item['from_date'] = str(item['from_date'])
                    #     if 'to_date' in item:
                    #         item['to_date'] = str(item['to_date'])
                    #     item['id'] = str(item['_id'])
                    #     del item['_id']
                    #     user_tasks.append(item)
                    user_people = []
                    col_people = db()['people']
                    for item in col_people.find(
                        {'user_id': {
                            '$in': [self.user_id]
                        }}):
                        item['id'] = str(item['_id'])
                        del item['_id']
                        del item['create_date']
                        del item['last_update']
                        del item['user_id']
                        user_people.append(item)
                    self.output['data']['item'] = user_info
                    # self.output['data']['item']['tasks'] = user_tasks
                    # self.output['data']['item']['people'] = user_people
                    self.set_output('public_operations', 'successful')
                except:
                    PrintException()
                    self.set_output('field_error', 'id_format')
        except:
            PrintException()
            self.set_output('public_operations', 'failed')
        self.kmwrite()
Esempio n. 27
0
def crawl_engine(item, engine_instance_id):
    global log_list
    log_list = []
    news_html = None
    # print('inside crawl')
    # item['mongo_id'] = str(item['_id'])
    # del item['_id']
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 '
        '(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
    }
    #     headers = {
    #     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
    # }
    status = ''
    news_text = ''
    # print('before req')
    # , headers = headers, verify = False
    try:
        # print('get')
        # print(item['url'])
        result = requests.get(url=item['url'], headers=headers,
                              verify=False).status_code
        # print('after get')
    except:
        # print('inja')
        # print(e)
        try:
            # print('get sec')
            result = requests.get(item['url'], verify=False)
        except:
            status = 'error_get_url'
            result = ''
            log(type='get_url',
                page_url=item['url'],
                selector='',
                data={},
                error=PrintException(),
                engine_instance_id=engine_instance_id,
                source_id=item['source_id'])
    # print('after req')
    if (result != '') or (result is not None):
        try:
            # print('bs4')
            html = BeautifulSoup(result.text, 'html.parser')
        except Exception as e:
            html = ''
            status = 'error_bs4'
        try:
            # print('select')
            news_html = html.select(item['text_selector'])
            if len(news_html) > 0:
                news_html = news_html[0]
                try:
                    # print('remove')
                    news_html = remove_hrefs(news_html)
                except:
                    status = 'error_remove_href'
                status = 'text'
                source_link_info = col_source_links.find_one(
                    {'_id': ObjectId(item['source_link_id'])})
                if 'exclude' in source_link_info:
                    for exclude in source_link_info['exclude']:
                        for ex in news_html.select(exclude):
                            ex.decompose()
            else:
                status = 'Empty'
                log(type='read_text',
                    page_url=item['url'],
                    selector=item['text_selector'],
                    data={},
                    error='Empty',
                    engine_instance_id=engine_instance_id,
                    source_id=item['source_id'])
        except Exception as e:
            news_html = ''
            status = 'error_selector'
            log(type='read_text',
                page_url=item['url'],
                selector=item['text_selector'],
                data={},
                error=str(e),
                engine_instance_id=engine_instance_id,
                source_id=item['source_id'])
            # col_news.update_one({'_id': ObjectId(item['mongo_id'])}, {'$set': {
            #     'status': 'error',
            #     'text': '',
            #     'html': '',
            #     'error': str(e),
            # }})
    if news_html is not None and news_html != '' and len(news_html) > 0:
        try:
            # print('text')
            news_text = news_html.text
        except:
            news_text = ''
            status = 'error_text'
            log(type='read_text',
                page_url=item['url'],
                selector=item['text_selector'],
                data={},
                error=PrintException(),
                engine_instance_id=engine_instance_id,
                source_id=item['source_id'])

    return status, news_text, news_html, log_list, error_count