コード例 #1
 def get_teilar(self):
     Grab announcements from the following websites and
     put them in separate custom RSS files:
     - http://teilar.gr/news.php?cid=1
     - http://teilar.gr/news.php?cid=2
     - http://teilar.gr/news.php?cid=5
     - http://teilar.gr/news.php?cid=6
     - http://teilar.gr/tmimatanews.php
     rss_filenames = {
         1: 'general.rss',
         2: 'teilar_ann.rss',
         5: 'council.rss',
         6: 'committee.rss',
         'tmimatanews.php': 'departments.rss',
     for cid, rss_name in rss_filenames.iteritems():
         custom_rss = self.initialize_rss_file()
         if type(cid) == int:
             output = teilar_anon_login('http://www.teilar.gr/news.php?cid=%s' % cid)
             output = teilar_anon_login('http://www.teilar.gr/%s' % cid)
         soup = BeautifulSoup(output)
             announcements_all = soup.find_all('table')[17].find_all('a', 'BlackText11')[:10]
         except Exception as error:
             logger_syslog.error(error, extra = log_extra_data())
         for item in announcements_all:
             Get inside the announcement to get the rest of the info
             ann_url = 'news_detail.php?nid=' + item['href'].split('nid=')[1]
             if type(cid) != int:
                 ann_url = 'tmimata/' + ann_url
             output = teilar_anon_login('http://www.teilar.gr/%s' % ann_url)
             soup = BeautifulSoup(output)
                 if type(cid) != int:
                     creator = soup.find('span', 'OraTextBold').contents[0].split(' >')[0].replace(u'Τεχν.', u'Τεχνολογίας')
                     creator = None
                 temp_td_oratext = soup.find_all('td', 'OraText')
                 pubdate = temp_td_oratext[0].contents[0].split('/')
                 pubdate = date(int(pubdate[2]), int(pubdate[1]), int(pubdate[0]))
                 title = temp_td_oratext[1].contents[0]
                 description = unicode(soup.find('td', 'BlackText11'))
                 enclosure = self.get_enclosure(soup)
             except Exception as error:
                 logger_syslog.error(error, extra = log_extra_data('http://teilar.gr' + ann_url))
             self.add_rss_item(custom_rss, title, 'http://teilar.gr/' + ann_url, pubdate, description, creator, enclosure)
         self.write_rss_file(custom_rss, rss_name)
コード例 #2
 def get_teachers(self):
     Retrieves the teachers from teilar.gr
     The output is dictionary with the following structure:
     teachers_from_teilar = {'url': ['name', 'email', 'department']}
     teachers_from_teilar = {}
     for pid in range(400):
         if pid == 386:
             Dirty workaround to avoid a teacher who has no matching department,
             probably because they are testing something
         Perform connections to each of the teacher's profile page. From the HTML
         output we grab the name, email and department
         url = 'http://www.teilar.gr/person.php?pid=%s' % pid
         output = teilar_anon_login(url)
         soup = BeautifulSoup(output)
         name = None
         email = None
         department = None
             name = soup.findAll('td', 'BlackText11Bold')[1].contents[0].strip()
         except IndexError:
             No teacher found, continue with the next item of the loop
             email = soup.findAll('td', 'BlackText11')[5].a.contents[0].split(' ')[0].strip()
         except AttributeError:
                 email = soup.findAll('td', 'BlackText11')[5].contents[0].strip()
             except IndexError:
         except IndexError:
             The string replace in the end is to keep it in track with dionysos.teilar.gr
             department = soup.findAll('td', 'BlackText11')[2].contents[0].strip().replace(u'Τεχν.', u'Τεχνολογίας')
         except IndexError:
         teachers_from_teilar[url] = [name, email, department]
     return teachers_from_teilar
コード例 #3
 def get_faculties(self):
     Retrieves the faculties from eclass.teilar.gr
     The output is dictionary with the following structure:
     faculties_from_eclass = {'url': ['name', 'code']}
     faculties_from_eclass = {}
     output = teilar_anon_login('http://openclass.teilar.gr/modules/auth/listfaculte.php')
     soup = BeautifulSoup(output)
     all_faculties = soup.table('td')
     for faculty in all_faculties:
         url = 'http://openclass.teilar.gr/modules/auth/' + faculty.a.get('href')
         name = faculty.a.contents[0].strip()
         code = faculty.small.contents[0].split(')')[0].replace('(', '').strip()
         faculties_from_eclass[url] = [name, code]
     return faculties_from_eclass
コード例 #4
 def get_departments(self):
     Retrieves the departments from teilar.gr
     The output is dictionary with the following structure:
     departments_from_teilar = {'url': 'name'}
     departments_from_teilar = {}
     output = teilar_anon_login("http://www.teilar.gr/schools.php")
     soup = BeautifulSoup(output)
     all_departments = soup.find_all("a", "BlueText")
     for department in all_departments:
         url = "http://www.teilar.gr/" + department.get("href")
         The string replace in the end is to keep it in track with dionysos.teilar.gr
         name = department.contents[0].replace(u"Τεχν.", u"Τεχνολογίας")
         departments_from_teilar[url] = name
     return departments_from_teilar
コード例 #5
 def get_lessons(self, faculties_from_db_q):
     Retrieves the lessons from eclass.teilar.gr
     The output is dictionary with the following structure:
     lessons_from_eclass = {'url': ['name', 'teacher', 'faculty', 'ltype']}
     lessons_from_eclass = {}
     for faculty in faculties_from_db_q:
         output = teilar_anon_login(faculty.url)
         soup = BeautifulSoup(output)
         for i in range(3):
             EclassLessons are grouped in three types:
             Undergraduate, Graduate, Other
             ltype = BeautifulSoup(str(soup.find_all('table', id='t%s' % i)))
             if not ltype:
                 If the lesson type does not exist, then move on
             all_lessons = ltype.find_all('tr', 'even') + ltype.find_all('tr', 'odd')
             for lesson in all_lessons:
                 url = lesson.small.contents[0][1:-1]
                 url = u'http://openclass.teilar.gr/courses/%s/' % url
                     name = lesson.a.contents[0].strip()
                 except AttributeError:
                     name = lesson.find_all('td')[1].contents[0].strip()
                     teacher = lesson.find_all('td')[2].contents[0].strip()
                 except IndexError:
                     teacher = None
                 if i == 0:
                     ltype = u'Προπτυχιακό'
                 elif i == 1:
                     ltype = u'Μεταπτυχιακό'
                 elif i == 2:
                     ltype = u'Άλλο'
                 lessons_from_eclass[url] = [unicode(name), unicode(teacher), faculty.name, ltype]
     return lessons_from_eclass
コード例 #6
def library(request):
    Perform search in library.teilar.gr and print the results
    notification = {}
    results = []
    if request.method == 'GET':
        form = LibraryForm(request.GET)
        if form.is_valid():
            url = 'http://hermes.lib.teilar.gr/ipac20/ipac.jsp?session=A26772NR74250.24315&menu=search&aspect=subtab22&npp=40&ipp=20&spp=20&profile=multbl--1&ri=&term=%s&index=.GEN&x=0&y=0&aspect=subtab22' % str(request.GET.get('search'))
            output = teilar_anon_login(url, request)
            soup = BeautifulSoup(output).find_all('table')[24]
            temp_a_mediumboldanchor = soup.find_all('a', 'mediumBoldAnchor')
            temp_td = soup.find_all('td')
            i = 5
            for item in temp_a_mediumboldanchor:
                title = item.contents[0]
                The authors are in <i> tags. Take the list of them by
                taking a list of the contents of <i> tags, and then
                join the list with commas for prettier output
                authors = []
                tmp_authors = temp_td[i].find_all('i')
                for author in tmp_authors:
                    authors.append(author.contents[0].replace(',', '').strip())
                authors = ', '.join(authors)
                editor = temp_td[i+1].contents[0].contents[0].split(' : ')[1]
                city = temp_td[i+1].contents[0].contents[0].split(' : ')[0]
                i += 10
                results.append([title, authors, editor, city])
            if not results:
                notification['info'] = 'Δεν υπάρχουν αποτελέσματα'
        form = SearchForm()
    return render_to_response('library.html', {
            'form': form,
            'notification': notification,
            'results': results,
        }, context_instance = RequestContext(request))
コード例 #7
 def get_teachers(self):
     Grab announcements from all the teachers, and put them in
     a custom RSS file.
     custom_rss = self.initialize_rss_file()
     output = teilar_anon_login('http://www.teilar.gr/profannnews.php')
     soup = BeautifulSoup(output)
         announcements_all = soup.find_all('a', 'BlackText11')
     except Exception as error:
         logger_syslog.error(error, extra = log_extra_data())
     authors = {}
     for item in announcements_all:
         The teacher's announcements are all under one page instead
         of being each one in separate page. We count in the combined
         page how many times a teacher's name is mentioned, and we
         parse the same number of the teacher's top announcements.
         The results are kept in a dictionary with the following structure:
         authors = {'url': number_of_announcements}
         url = item['href']
         if url in authors.keys():
             authors[url] = authors[url] + 1
             authors[url] = 1
     for url, number in authors.iteritems():
         Get inside the teacher's page which contains all the announcements
         output = teilar_anon_login('http://www.teilar.gr/%s' % url)
         soup = BeautifulSoup(output)
             author_name = soup.find('td', 'BlueTextBold').i.contents[0]
         except Exception as error:
             logger_syslog.error(error, extra = log_extra_data(url))
         Select only the number of announcements we want
             announcements_all = soup.find_all('td', 'LineDownDots')[0:number]
         except Exception as error:
             logger_syslog.error(error, extra = log_extra_data(url))
         for announcement in announcements_all:
             Parse data from each announcement
                 temp_td_blacktext11 = announcement.find_all('td', 'BlackText11')
                 title = temp_td_blacktext11[0].b.contents[0]
                 pubdate = announcement.find('td', 'OraText').contents[0].split('/')
                 pubdate = date(int(pubdate[2]), int(pubdate[1]), int(pubdate[0]))
                 description = temp_td_blacktext11[1]
                 enclosure = self.get_enclosure(soup)
             except Exception as error:
                 logger_syslog.error(error, extra = log_extra_data(author_name))
             self.add_rss_item(custom_rss, title, 'http://teilar.gr/' + url, pubdate, description, author_name, enclosure)
     self.write_rss_file(custom_rss, 'teachers.rss')