Esempio n. 1
0
def scrape_adsu():
    """Get information about the adsu in a crazy way due to their bitching page made like shit"""

    scraped_info = {}
    adsu_url = "http://www.adsuaq.org/"
    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "accept-charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
        "accept-encoding": "gzip,deflate,sdch",
        "accept-language": "en-US,en;q=0.8",
    }

    request = requests.get(adsu_url, headers=headers)

    if request.status_code != 200:
        print("Error! Status "+request.status_code)
        return

    info = BeautifulSoup(request.text, "html.parser").find(id="AutoNumber5").text.replace("  ", "")\
           .replace("\t", "").replace("\r", "").replace("\n\n", "")

    scraped_info.update({
        "info": info
    })

    utils.write_json(scraped_info, "../json/adsu.json")
Esempio n. 2
0
def newson_command(bot, update):
    """Defining the command to enable notifications for news"""

    if update.message.chat_id not in utils.SUBSCRIBERS:
        utils.SUBSCRIBERS.append(update.message.chat_id)
        bot.sendMessage(update.message.chat_id, text='Notifiche Abilitate!')
        utils.write_json(utils.SUBSCRIBERS, "json/subscribers.json")
    else:
        bot.sendMessage(update.message.chat_id, text='Le notifiche sono già abilitate!')
Esempio n. 3
0
def newson_command(bot, update):
    """Defining the command to enable notifications for news"""

    if update.message.chat_id not in utils.SUBSCRIBERS:
        utils.SUBSCRIBERS.append(update.message.chat_id)
        bot.sendMessage(update.message.chat_id, text='Notifiche Abilitate!')
        utils.write_json(utils.SUBSCRIBERS, "json/subscribers.json")
    else:
        bot.sendMessage(update.message.chat_id, text='Le notifiche sono già abilitate!')
Esempio n. 4
0
def newsoff_command(bot, update):
    """Defining the command to disable notifications for news"""

    if update.message.chat_id in utils.SUBSCRIBERS:
        utils.SUBSCRIBERS.remove(update.message.chat_id)
        bot.sendMessage(update.message.chat_id, text='Notifiche Disattivate!')
        utils.write_json(utils.SUBSCRIBERS, "json/subscribers.json")
    else:
        bot.sendMessage(update.message.chat_id,
                        text='Per disattivare le notifiche dovresti prima attivarle.')
Esempio n. 5
0
def newsoff_command(bot, update):
    """Defining the command to disable notifications for news"""

    if update.message.chat_id in utils.SUBSCRIBERS:
        utils.SUBSCRIBERS.remove(update.message.chat_id)
        bot.sendMessage(update.message.chat_id, text='Notifiche Disattivate!')
        utils.write_json(utils.SUBSCRIBERS, "json/subscribers.json")
    else:
        bot.sendMessage(update.message.chat_id, text='Per disattivare le notifiche dovresti \
                        prima attivarle.')
Esempio n. 6
0
def scrape_professors():
    """Get information about professors"""

    scraped_professors = []
    professors_url = "http://www.disim.univaq.it/didattica/" \
                     "content.php?tipo=3&ordine=1&chiave=0&pid=25&did=8&lid=it&" \
                     "frmRicercaNome=&frmRicercaCognome=&frmRicercaLaurea=1&action_search=Filtra"
    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "accept-charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
        "accept-encoding": "gzip,deflate,sdch",
        "accept-language": "en-US,en;q=0.8",
    }

    request = requests.get(professors_url, headers=headers)

    if request.status_code != 200:
        print("Error! Status "+request.status_code)
        return

    professors_table = BeautifulSoup(request.text, "html.parser").find("table")


    firsts_td = professors_table.find_all(colspan='2')
    for name_cell in firsts_td:
        name = name_cell.find("a").text
        phone = name_cell.find_next_sibling().text
        email = str(name_cell.find_next_sibling().find_next_sibling().a) \
                .replace('<a href="#">', '').replace('</a>', '') \
                .replace('<img alt="dot" height="2" src="img/dot.gif" width="3"/>', '.') \
                .replace('<img alt="at" height="10" src="img/at.gif" width="12"/>', '@')
        courses = name_cell.find_next_sibling().find_next_sibling().find_next_sibling() \
                  .text.replace('\n', '').replace('\u00a0', '').replace('[F3I]', '') \
                  .replace('[F4I]', '').replace('[F3M]', '').replace('[I3N]', '') \
                  .replace('[I4T]', '')

        scraped_professors.append({
            "nome": name if name != "" else "non disponibile",
            "telefono": phone if phone != "" else "non disponibile",
            "e-mail": email if email != "" else "non disponibile",
            "corsi": courses if courses != "" else "non disponibile",
            "ufficio": "0"
        })

    utils.write_json(scraped_professors, "../json/professors.json")
Esempio n. 7
0
def notify_news(bot):
    """Defining method that will be repeated over and over"""
    unread_news = news.check_news()

    if len(unread_news) > 0:
        data = news.pull_news(10)
        utils.write_json(data, "json/news.json")
        new_news_string = ""
        for item in unread_news:
            truncated_descr = item['description'][:75] + '...' if len(item['description']) > 75\
                              else item['description']
            new_news_string += "- [" + item['title'] + "](" + item['link'] + ")\n" \
                              + truncated_descr + "\n"

        for chat_id in utils.SUBSCRIBERS:
            bot.sendMessage(chat_id, parse_mode='Markdown', text=new_news_string)

    JOB_QUEUE.put(notify_news, 40, repeat=True)
Esempio n. 8
0
def notify_news(bot):
    """Defining method that will be repeated over and over"""
    unread_news = news.check_news()

    if len(unread_news) > 0:
        data = news.pull_news(10)
        utils.write_json(data, "json/news.json")
        new_news_string = ""
        for item in unread_news:
            truncated_descr = item['description'][:75] + '...' if len(item['description']) > 75\
                              else item['description']
            new_news_string += "- [" + item['title'] + "](" + item['link'] + ")\n" \
                              + truncated_descr + "\n"

        for chat_id in utils.SUBSCRIBERS:
            bot.sendMessage(chat_id,
                            parse_mode='Markdown',
                            text=new_news_string)

    JOB_QUEUE.put(notify_news, 40, repeat=True)
Esempio n. 9
0
def scrape_student_office():
    """Get info about the student service office"""

    scraped_info = {}
    student_office_url = "http://www.univaq.it/section.php?id=607"
    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)",
        "accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "accept-charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
        "accept-encoding": "gzip,deflate,sdch",
        "accept-language": "en-US,en;q=0.8",
    }

    request = requests.get(student_office_url, headers=headers)

    if request.status_code != 200:
        print("Error! Status " + request.status_code)
        return

    first_row = BeautifulSoup(request.text, "html.parser").find(string="AREA SCIENTIFICA")\
                .parent.parent.find_next_sibling().find("tr")

    address = first_row.find(class_="address_table_description").text
    phone = first_row.find_next_sibling().find(
        class_="address_table_description").text
    email = first_row.find_next_sibling().find_next_sibling()\
            .find(class_="address_table_description").text
    hours = first_row.find_next_sibling().find_next_sibling().find_next_sibling()\
            .find(class_="address_table_description").text.replace('\n', '')\
            .replace("13", "13, ")

    scraped_info.update({
        "indirizzo": address,
        "telefono": phone,
        "e-mail": email,
        "orari": hours
    })

    utils.write_json(scraped_info, "../json/student_office.json")
Esempio n. 10
0
def notify_news(bot):
    """Defining method that will be repeated over and over"""
    unread_news = news.check_news()
    invalid_chatid = list()

    if unread_news:
        data = news.pull_news(10)
        news_to_string = ""
        utils.write_json(data, "json/news.json")

        for item in unread_news:
            news_to_string += "- [{title}]({link})\n{description}\n".format(**item)

        for chat_id in utils.SUBSCRIBERS:
            try:
                bot.sendMessage(chat_id, parse_mode='Markdown', text=news_to_string)
            except TelegramError:
                invalid_chatid.append(chat_id)

        for chat_id in invalid_chatid:
            utils.SUBSCRIBERS.remove(chat_id)
            utils.write_json(utils.SUBSCRIBERS, "json/subscribers.json")
def scrape_student_office():
    """Get info about the student service office"""

    scraped_info = {}
    student_office_url = "http://www.univaq.it/section.php?id=607"
    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "accept-charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
        "accept-encoding": "gzip,deflate,sdch",
        "accept-language": "en-US,en;q=0.8",
    }

    request = requests.get(student_office_url, headers=headers)

    if request.status_code != 200:
        print("Error! Status "+request.status_code)
        return

    first_row = BeautifulSoup(request.text, "html.parser").find(string="AREA SCIENTIFICA")\
                .parent.parent.find_next_sibling().find("tr")

    address = first_row.find(class_="address_table_description").text
    phone = first_row.find_next_sibling().find(class_="address_table_description").text
    email = first_row.find_next_sibling().find_next_sibling()\
            .find(class_="address_table_description").text
    hours = first_row.find_next_sibling().find_next_sibling().find_next_sibling()\
            .find(class_="address_table_description").text.replace('\n', '')\
            .replace("13", "13, ")

    scraped_info.update({
        "indirizzo": address,
        "telefono": phone,
        "e-mail": email,
        "orari": hours
    })

    utils.write_json(scraped_info, "../json/student_office.json")
        return ''
    s = ''.join([c for c in s if c.isdigit() or c == '+'])
    if s and s[0] != '+' and len(s) == 10:
        s = '+39' + s  # if not already internationalized, make it Italian
    return '-'.join([s[:3], s[3:7], s[7:]]) if s.startswith('+39') else s


def scrape_professors(url=PROFESSORS_URL):
    """Get information about professors"""

    scraped_professors = []
    soup = utils.get_soup_from_url(url)
    professor_names = soup.find("table").find_all(colspan='2')
    for name_cell in professor_names:
        name, phone, email, courses, _ = name_cell.parent.find_all('td')
        scraped_professors.append({
            "nome":
            name.text or "non disponibile",
            "telefono":
            phone_cleanup(phone.text) or "non disponibile",
            "e-mail":
            email_soup_cleanup(email) or "non disponibile",
            "corsi":
            courses_cleanup(courses.text) or "non disponibile",
        })
    return scraped_professors


if __name__ == "__main__":
    utils.write_json(scrape_professors(), "../json/professors.json")
        img.replace_with('.')
    return email_soup.text.strip()  # .lower()  # ?

def phone_cleanup(s):
    """Clean the phones' output"""
    if not s:
        return ''
    s = ''.join([c for c in s if c.isdigit() or c == '+'])
    if s and s[0] != '+' and len(s) == 10:
        s = '+39' + s  # if not already internationalized, make it Italian
    return '-'.join([s[:3], s[3:7], s[7:]]) if s.startswith('+39') else s

def scrape_professors(url=PROFESSORS_URL):
    """Get information about professors"""

    scraped_professors = []
    soup = utils.get_soup_from_url(url)
    professor_names = soup.find("table").find_all(colspan='2')
    for name_cell in professor_names:
        name, phone, email, courses, _ = name_cell.parent.find_all('td')
        scraped_professors.append({
            "nome": name.text or "non disponibile",
            "telefono": phone_cleanup(phone.text) or "non disponibile",
            "e-mail": email_soup_cleanup(email) or "non disponibile",
            "corsi": courses_cleanup(courses.text) or "non disponibile",
        })
    return scraped_professors

if __name__ == "__main__":
    utils.write_json(scrape_professors(), "../json/professors.json")
Esempio n. 14
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""This script scrapes information about the student service office from the univaq website."""

import sys
sys.path.insert(0, '../')
from libs.utils import utils

STUDENT_OFFICE_URL = "http://www.univaq.it/section.php?id=607"

def scrape_student_office(url=STUDENT_OFFICE_URL):
    """Get info about the student service office"""

    soup = utils.get_soup_from_url(url)
    area = soup.find(text='AREA SCIENTIFICA').parent.parent.find_next_sibling()
    address, phone, email, hours = area.find_all(class_='address_table_description')
    return { 'indirizzo': address.text,
             'telefono': phone.text,
             'e-mail': email.text,
             'orari': hours.text.strip().replace('13', '13, ') }

if __name__ == "__main__":
    utils.write_json(scrape_student_office(), "../json/student_office.json")
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""This script scrapes all the info about the adsu of the university's city."""

import sys
sys.path.insert(0, '../')
from libs.utils import utils

ADSU_URL = "http://www.adsuaq.org/"

def scrape_adsu(url=ADSU_URL):
    """Get information about the adsu in a crazy way due to their bitching page made like shit"""

    soup = utils.get_soup_from_url(url).find(id="AutoNumber5")
    info = soup.text.replace("  ", "").replace("\t", "").replace("\r", "").replace("\n\n", "")
    return {"info": info}

if __name__ == "__main__":
    utils.write_json(scrape_adsu(), "../json/adsu.json")
Esempio n. 16
0
def create_news_json():
    """Defining command to check (and create) the news.json file"""

    if not os.path.isfile("json/news.json"):
        utils.write_json(pull_news(10), "json/news.json")
Esempio n. 17
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This script scrapes all the info about the adsu of the university's city."""

import sys
sys.path.insert(0, '../')
from libs.utils import utils

ADSU_URL = "http://www.adsuaq.org/"


def scrape_adsu(url=ADSU_URL):
    """Get information about the adsu in a crazy way due to their bitching page made like shit"""

    soup = utils.get_soup_from_url(url).find(id="AutoNumber5")
    info = soup.text.replace("  ",
                             "").replace("\t",
                                         "").replace("\r",
                                                     "").replace("\n\n", "")
    return {"info": info}


if __name__ == "__main__":
    utils.write_json(scrape_adsu(), "../json/adsu.json")