コード例 #1
0
def count_reactions(client):
    # legacy_reactions = get_legacy_reactions()
    database = DataBase()
    for user in Session().query(SlackUser).all():
        reactions_response = client.reactions_list(user=user.id)
        items = reactions_response.data['items']
        for item in items:
            channel = item['channel']
            message = item['message']
            ts = message['ts']
            author = message['user']
            reactions = message['reactions']
            # if user.id in legacy_reactions.keys() and ts == legacy_reactions[user.id]:
            #     break
            for reaction in reactions:
                if user.id in reaction['users']:
                    reaction_code = reaction['name'].split(':')[0]
                    if database.select(Reaction,
                                       channel=channel,
                                       timestamp=ts,
                                       sender=user.id,
                                       receiver=author,
                                       name=reaction_code) is None:
                        database.insert(
                            Reaction(channel, ts, reaction_code, user.id,
                                     author))
コード例 #2
0
def start():
    database = DataBase()
    with open(current_path + '/course_list.csv', 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            course = Course(row['name'], row['tag'], row['url'], True)
            database.insert(course)
    with open('course_list.csv', 'w') as file:
        file.write('')
コード例 #3
0
def structure_message(notification):
    database = DataBase()
    author = database.select(Author, id=notification.author)
    output_string = '>*' + notification.title + '*\n>\n>'
    output_string += str(notification.text).replace('\n', '\n>') + '\n>\n>'
    output_string += '*' + author.first_name + ' ' + author.last_name + '*' + ' ' * 10 + parse_date(
        str(notification.publish_date)) + '\n\n'
    output_string += '[src: ' + notification.link + ']'
    return output_string
コード例 #4
0
ファイル: command_api.py プロジェクト: jmajaca/infobot-public
def command_endpoint():
    database = DataBase()
    data, data_ok = SlackCommandUtils.read_data(request, text_tokens_length=1)
    if not data_ok:
        return 'Invalid request format', 400
    command = data['text']
    if command != 'start' and command != 'stop':
        return 'Invalid request format', 400
    response = requests.get(default_protocol + '://' + config.get('flask_address') + ':' + config.get('flask_port') +
                            '/ui/home/scraper/' + command)
    success_flag = response.status_code == 500
    slack_command_log = SlackCommandUtils.create_slack_command_log(data, success_flag)
    database.insert(slack_command_log)
    if response.status_code == 200:
        return '', 200
    else:
        return 'Error has occurred while commanding scraper', 500
コード例 #5
0
 def __init__(self, link: str, payload: dict, **kwargs):
     self.database = DataBase()
     self.progress_queue_manager = ProgressQueueManager()
     self.logger = Logger()
     self.link = link
     self.payload = payload
     self.date_format = {
         'small': '%Y-%m-%dT%H:%M',
         'medium': '%Y-%m-%dCET%H:%M',
         'large': '%Y-%m-%dCEST%H:%M'
     }
     self.headers = {'User-Agent': 'Mozilla/5.0'}
     self.html_parser = 'html.parser'
     if kwargs and 'headers' in kwargs:
         self.headers = kwargs['headers']
     if kwargs and 'html_parser' in kwargs:
         self.html_parser = kwargs['html_parser']
コード例 #6
0
def get_reaction_table():
    database = DataBase()
    data, data_ok = SlackCommandUtils.read_data(request=request, text_tokens_length=2)
    try:
        reaction_name = data['text'][1][1:-1]
        if not data_ok:
            return 'Invalid request format', 400
        if data['text'][0] == 'r':
            result = reaction_manager.get_top_receivers(reaction_name=reaction_name)
        elif data['text'][0] == 's':
            result = reaction_manager.get_top_senders(reaction_name=reaction_name)
        else:
            return 'Invalid request format', 400
        response = {'response_type': 'in_channel'}
        response_text = 'Here is top chart for :' + reaction_name + ':\n\n'
        for i, info in enumerate(result):
            response_text += str(i + 1) + '. <@' + database.select(SlackUser, name=info[0]).id + '> with ' + str(info[1]) + ' total '
            if data['text'][0] == 'r':
                response_text += 'received'
            elif data['text'][0] == 's':
                response_text += 'sent'
            response_text += ' reactions\n'
        response['text'] = response_text
        slack_command_log = SlackCommandUtils.create_slack_command_log(data, True)
        database.insert(slack_command_log)
        return jsonify(response), 200
    except Exception as e:
        slack_command_log = SlackCommandUtils.create_slack_command_log(data, False)
        database.insert(slack_command_log)
        return '', 500
コード例 #7
0
ファイル: course_api.py プロジェクト: jmajaca/infobot-public
def archive_course():
    database = DataBase()
    data, data_ok = SlackCommandUtils.read_data(request=request,
                                                text_tokens_length=1)
    if not data_ok:
        return 'Invalid request format', 400
    response = requests.post(default_protocol + '://' +
                             config.get('flask_address') + ':' +
                             config.get('flask_port') +
                             '/ui/channel/archive?tag=' + data['text'][1:])
    success_flag = response.status_code == 200
    slack_command_log = SlackCommandUtils.create_slack_command_log(
        data, success_flag)
    database.insert(slack_command_log)
    if success_flag:
        return '', 200
    else:
        return 'Error has occurred while archiving the channel', 500
コード例 #8
0
from flask import Blueprint, Response

from src.main import client
from src.main.objects.scanner import Scanner
from src.models.base import DataBase
from src import Logger

app_nav_bar = Blueprint('app_nav_bar', __name__, template_folder='templates')
scanner = Scanner(client, DataBase())
logger = Logger()


@app_nav_bar.route('/scan/reactions', methods=['GET'])
def scan_reactions():
    try:
        scanner.scan_reactions()
        return Response(status=200)
    except Exception as e:
        logger.error_log(e)
        return Response(status=500)


@app_nav_bar.route('/scan/users', methods=['GET'])
def scan_users():
    try:
        scanner.scan_users()
        return Response(status=200)
    except Exception as e:
        logger.error_log(e)
        return Response(status=500)
コード例 #9
0
def main():
    logger.info_log('Started populating database.')
    database = DataBase()
    client = slack.WebClient(token=sys.argv[1])
    response = client.users_list()
    # insert users
    for user in response['members']:
        if database.select(SlackUser, id=user['id']) is None:
            database.insert(SlackUser(user['id'], user['name']))
    # insert channels
    response = client.conversations_list()
    for channel in response['channels']:
        date_created = datetime.fromtimestamp(channel['created'])
        author_id = channel['creator']
        if database.select(Channel, id=channel['id']) is None:
            database.insert(
                Channel(channel['id'], '#' + channel['name'], author_id,
                        date_created))
    # insert courses
    courses = [('Skriptni jezici', '#skriptni'),
               ('Umjetna inteligencija', '#ai'),
               ('Statistička analiza podataka', '#sap'),
               ('Trgovačko pravo', '#pravo'),
               ('Interaktivna računalna grafika', '#irg'),
               ('Završni rad', '#završni'),
               ('Napredno korištenje operacijskog sustava Linuxi', '#linux')]
    for course in courses:
        if database.select(Course, name=course[0],
                           channel_tag=course[1]) is None:
            database.insert(Course(*course))
    # insert reactions
    response = client.users_list()
    count_reactions(client)
    # if there is legacy file with reactions insert legacy reactions (w/o channel and timestamp attribute)
    if len(sys.argv) == 3:
        count_legacy_reactions(response, database, sys.argv[2])
    logger.info_log('Ended populating database.')
コード例 #10
0
from flask import Blueprint, render_template, Response, request, redirect, url_for

from src.main import client
from src.main.objects.reaction_scrapper import ReactionScrapper
from src.models.base import DataBase, Session
from src import Logger
from src.main.objects.reaction_manager import ReactionManager
import re

app_reaction = Blueprint('app_reaction', __name__, template_folder='templates')
logger = Logger()
reaction_scrapper = ReactionScrapper(client, DataBase(), logger)
reaction_manager = ReactionManager(logger)


@app_reaction.route('/ui/reaction/<name>', methods=['GET', 'POST'])
def get_reactions(name):
    senders, receivers, top_channels, latest_reactions = reaction_manager.get_top_all(reaction_name=name)
    return render_template('reaction.html', senders=senders, receivers=receivers, top_channels=top_channels,
                           name=name,
                           latest_reactions=latest_reactions, alive=reaction_scrapper.is_alive()), 200


@app_reaction.route('/ui/reaction/scan', methods=['GET'])
def scan_reactions():
    try:
        reaction_scrapper.count()
        return Response(status=200)
    except Exception as e:
        logger.error_log(e)
        return Response(status=500)
コード例 #11
0
from flask import Blueprint, render_template, request, Response

from src import Logger
from src.main import client
from src.main.objects.reminder_manager import ReminderManager
from src.models.base import DataBase
from src.web_app.services import reminder_service


app_reminder = Blueprint('app_reminder', __name__, template_folder='templates')
logger = Logger()
reminder_manager = ReminderManager(client, DataBase(), logger)  # used for all interactions with database

@app_reminder.route('/ui/reminder', methods=['GET'])
def get_reminders():
	"""
	default mapping with all reminders
	"""
	reminders = reminder_manager.get_reminders()
	courses, authors = reminder_manager.get_filter_options()
	return render_template('reminder.html', courses=courses, authors=authors, reminders=reminders), 200


@app_reminder.route('/ui/reminder/filter', methods=['POST'])
def filter_reminders():
	"""
	mapping for returning only filtered reminders
	params: name : name of the course
			author : first and last name of the author
			from : all dates after it
			to : all dates after it
コード例 #12
0
def start_scraper_process():
    progress_queue.put((NONE_PROGRESS, 'Starting scraper'))
    database = DataBase()
    logger.info_log('Program started.')
    # refresh_active_courses.start()
    # count_reactions(client)
    timeout = 600
    try:
        loop_count = 0
        courses = database.select_many(Course, watch=True)
        while True:
            check_pins(client, logger)
            check_reminders(client, logger)
            notifications = scraper.start(courses)
            print('Scraping phase done.')
            # TODO catch exception do in main
            if notifications is None:
                progress_queue.put((INIT_PROGRESS + SCRAPE_PROGRESS), None, 'warning')
                timeout *= 2
                time.sleep(min(timeout, 2400))
                notifications = []
            else:
                timeout = 600
            for i in range(len(notifications)):
                notification = notifications[i]
                result = database.select(Notification, title=notification.title, site=notification.site,
                                         author=notification.author, publish_date=notification.publish_date,
                                         text=notification.text, link=notification.link)
                if result is None:
                    fresh_notification = notification.publish_date + timedelta(hours=24) >= datetime.now()
                    course = database.select(Course, id=notification.site)
                    if check_filters(notification) and fresh_notification:
                        channel = database.select(Channel, tag=course.channel_tag)
                        # check if course.channel_tag is enough (in place of channel.id)
                        response = client.chat_postMessage(channel=channel.id,
                                                           text=structure_message(notification))
                    database.insert(notification)
                    if check_filters(notification) and fresh_notification:
                        for reminder in generate_reminders(notification):
                            database.insert(reminder)
                        # https://api.slack.com/methods/pins.add
                        client.pins_add(channel=response['channel'], timestamp=response['ts'])
                        database.insert(Pin(datetime.now(), timedelta(hours=24), response['channel'], response['ts']))
                progress_queue.put((INIT_PROGRESS + SCRAPE_PROGRESS + int(SAVE_PROGRESS/(len(notifications))) * (i+1), None))
            gc.collect()
            if loop_count == 10:
                count_reactions(client)
                courses = database.select_many(Course, watch=True)
                loop_count = 0
            progress_queue.put((DONE_PROGRESS, 'Scraping done', 'sleep'))
            time.sleep(60)
            progress_queue.put((NONE_PROGRESS, 'Starting scraper'))
    except Exception as e:
        logger.error_log(e)
    finally:
        error_channel = '#random'
        client.chat_postMessage(channel=error_channel, text='I am dead.')
        logger.info_log('Program finished with exit code 1.')
        progress_queue.put((NONE_PROGRESS, 'Program finished with exit code 1', 'error'))
        exit(1)
コード例 #13
0
class WebScraper:
    """
    A class that scrapes FER Web for notifications

    Attributes
    ----------
    database: DataBase
        a object that is responsible for communicating with database
    progress_queue_manager: ProgressQueueManager
        a object that is responsible for keeping track of progress made while scraping notifications
    logger : Logger
        a object that is saving scanner logs to a predefined file
    link : str
        a link to FER web page from which data will be scrapped
    payload: dict
        a dictionary that contains payload (like username and password) for logging into target website
    date_format: dict
        a dictionary that contains date formats found in web notifications
    headers: dict
        optional dictionary that is sent as HTTP header when scraping
    html_parser: str
        optional parameter that defines which html parser to use when parsing scraped raw HTML

    Methods
    -------
    start(courses: list[Course]) -> list[Notification]
        starts wrapped process of scraping notifications for target courses
    generate_notifications(courses: list[Course]) -> list[Notification]
        scraping notifications for target courses
    _check_author(author_string: str) -> Author
        checks for author and saves it to database if author doesn't exists
    _check_date(date_elements: list)
        returns most recent date object based on date_format
    _check_text(text: str) -> str
        parsing text for Slack message
    """
    def __init__(self, link: str, payload: dict, **kwargs):
        self.database = DataBase()
        self.progress_queue_manager = ProgressQueueManager()
        self.logger = Logger()
        self.link = link
        self.payload = payload
        self.date_format = {
            'small': '%Y-%m-%dT%H:%M',
            'medium': '%Y-%m-%dCET%H:%M',
            'large': '%Y-%m-%dCEST%H:%M'
        }
        self.headers = {'User-Agent': 'Mozilla/5.0'}
        self.html_parser = 'html.parser'
        if kwargs and 'headers' in kwargs:
            self.headers = kwargs['headers']
        if kwargs and 'html_parser' in kwargs:
            self.html_parser = kwargs['html_parser']

    def start(self, courses):
        notifications = []
        try:
            notifications = self.generate_notifications(courses)
        except errors.LoginError as e:
            notifications = None
            self.logger.warning_log(e.text)
        except Exception as e:
            notifications = []
            self.logger.error_log(
                e, text='Error has occurred while scraping notifications ')
        finally:
            return notifications

    def generate_notifications(self, courses):
        notifications = []
        session = requests.Session()
        intranet = session.post(self.link + '/login',
                                headers=self.headers,
                                data=self.payload)
        soup = BeautifulSoup(intranet.text, self.html_parser)
        check_element = soup.find('li', {'class': 'home-page'})
        if not check_element.text.__contains__('Intranet'):
            raise errors.LoginError
        self.progress_queue_manager.init_phase()
        for i, course in enumerate(courses):
            time.sleep(2)
            self.progress_queue_manager.scraping_course_start_info(course)
            link = course.url + '/obavijesti'
            raw_html = session.get('https://' + link,
                                   headers=self.headers,
                                   data=self.payload).text
            soup = BeautifulSoup(raw_html, self.html_parser)
            news_articles = soup.findAll('div', {'class': 'news_article'})
            for j, news_article in enumerate(news_articles):
                title_element = news_article.find('div',
                                                  {'class': 'news_title'})
                notification = Notification()
                notification.link = self.link + title_element.a['href']
                notification.title = title_element.get_text().strip()
                notification.site = self.database.select(Course,
                                                         name=course.name).id
                notification.author = self._check_author(
                    news_article.find('span', {
                        'class': 'author_name'
                    }).get_text().strip()).id
                notification.publish_date = self._check_date(
                    news_article.findAll('time'))
                notification.text = self._check_text(
                    str(news_article.find('div',
                                          {'class': 'news_lead'})).replace(
                                              '<p>', '').replace('</p>', '\n'))
                notifications.append(notification)
                self.progress_queue_manager.scrape_phase(
                    course, i, j, len(courses), len(news_articles))
            self.progress_queue_manager.scraping_course_done_info(
                course, i, len(courses))
        session.get(self.link + '/login/Logout?logout=1')
        return notifications

    def _check_author(self, author_string: str) -> Author:
        author_name_list = author_string.split()
        author = self.database.select(Author,
                                      first_name=author_name_list[0],
                                      last_name=' '.join(author_name_list[1:]))
        if author is None:
            self.database.insert(
                Author(author_name_list[0], ' '.join(author_name_list[1:])))
            author = self.database.select(Author,
                                          first_name=author_name_list[0],
                                          last_name=' '.join(
                                              author_name_list[1:]))
        return author

    def _check_date(self, date_elements: list):
        date = None
        for date_element in date_elements:
            element = date_element['datetime']
            if len(element) == 16:
                iteration_date = datetime.strptime(element,
                                                   self.date_format['small'])
            elif len(element) == 18:
                iteration_date = datetime.strptime(element,
                                                   self.date_format['medium'])
            else:
                iteration_date = datetime.strptime(element,
                                                   self.date_format['large'])
            if date is None:
                date = iteration_date
            elif date < iteration_date:
                # if date was edited get most recent date
                date = iteration_date
        return date

    # https://api.slack.com/reference/surfaces/formatting
    def _check_text(self, text: str) -> str:
        # getting only text from html
        text = '\n'.join(text.split('\n')[1:-1]).strip()
        # replacing HTML spacing
        text = text.replace(u"\u00A0", " ")
        # closing right gaps
        text = re.sub(r'([0-9A-z:;,"\').]) +(</)', r'\g<1>\g<2>', text)
        # closing left gaps
        text = re.sub(r'(<[^>]+>) +([^ ])', r'\g<1>\g<2>', text)
        # expanding right gaps
        text = re.sub(r'(</[^>]+>)([^ :;,"\').])', r'\g<1> \g<2>', text)
        # expanding left gaps
        text = re.sub(r'([^ :;"\'(.])(<[^/>]+>)', r'\g<1> \g<2>', text)
        # replacing bold and italic html tags for slack tags
        text = text.replace('<strong>', '*').replace('</strong>', '*').replace(
            '<em>', '_').replace('</em>', '_')
        # parsing links
        link_groups: list[LinkHelper] = LinkHelper.create_list(
            re.findall(r'(<a href=\"([^<> ]*)\"[^<>]*>([^<>]*)</a>)', text))
        for link_group in link_groups:
            if 'http' in link_group.target:
                text = re.sub(
                    re.escape(link_group.target),
                    '&lt;' + link_group.link + '|' + link_group.text + '&gt;',
                    text)
            else:
                mail_elements = re.search(
                    r"javascript:cms_mail\(\'([^,]*)\',\'([^,]*)\'.*\)",
                    link_group.link)
                text = re.sub(
                    re.escape(link_group.target),
                    '&lt;mailto:' + mail_elements.group(1) + '@' +
                    mail_elements.group(2) + '|' + link_group.text + '&gt;',
                    text)
        # workaround for Slack not parsing list as expected
        text = text.replace('<li>', '<li>• ')
        # removing extra spaces
        text = re.sub(r' +', r' ', text)
        # parsing the rest of text
        return BeautifulSoup(text, self.html_parser).get_text()