def start(): """Start of the program""" print("LOADING Config...", end=" ") CONFIG.load() print("DONE") print("SETTING UP DATABASE...", end=" ") Database.setup() print("DONE") print("LOADING AUTHENTICATION...", end=" ") Auth.start() print("DONE") print("LOADING SCRAPER...", end=" ") Scraper.start() print("DONE") print("LOADING LIBRARIES...", end=" ") Library.start() print("DONE") if Ripper.enabled: print("STARTING RIPPER...", end=" ") Ripper.start() print("DONE") print("STARTING WEBSERVICES...", end=" ") Webserver.start() print("DONE") print("TACKEM HAS STARTED")
def run(): CONFIG.load() date = Tools.get_date() crawler = Crawler() if crawler.start(date): Mail().send_mail(date)
def _parser(self, html): daily = html.find(class_='daily') date = daily.find(class_='date') data = [date.prettify(self.html_encode)] for post in daily.find_all(class_='post'): title = post.find(class_='title').a subject = post.find(class_='subject-name') if not subject.a['href'].startswith(self.base_url): subject.a['href'] = self.base_url + subject.a['href'] data.append('<br />') data.append(title.prettify(self.html_encode)) data.append(subject.prettify(self.html_encode)) return data def _save(self, date, data): filename = date + '.html' filepath = self.path + filename with open(filepath, 'w') as f: f.write(''.join(data)) if __name__ == '__main__': CONFIG.load() date = Tools.get_date() Crawler().start(date)