def awake(self, transaction): WebKit.Page.Page.awake(self, transaction) self.response = transaction.response() self.response.setHeader('Content-Type', 'text/css') self.request = transaction.request() self.out = Page.OutputWrapper(self.response) self.session = Page.SessionWrapper(transaction.session())
def test_page(page_name): """Check if given Page*.py file is working properly""" basename = os.path.basename(page_name) name, ext = os.path.splitext(basename) test_string = 'phony' test = Page.Default(body_id=test_string) render = test.Render() assert test_string in render print '#1 %s (data) OK' % name test = Page.Default() render = test.Render() assert len(render) print '#2 %s () OK' % name wp = get_server()._web_paths print '#3 %s rutas publicadas' % len(wp) for i in range(len(wp)): url = wp[i]._regex[1:] try: func = str(wp[i]._Publish_FakeClass__func).split()[1] except AttributeError: func = str( wp[i].__init__).split('.__init')[0].split('bound method ')[1] print "%24s --> %s" % (func, url)
def ajout_page(self): if self.num_pages < 10: page = Page(self.pages_frais, self.num_pages) self.num_pages += 1 self.pages_frais.add(page.page, text="Page %d" % self.num_pages) page.creer_page() self.collec_pages.append(page) self.pages_frais.select(self.num_pages-1)
class TestPageMethod(unittest.TestCase): def setUp(self): self.testPage = Page('Sample') def test_getUrl(self): self.assertEqual(self.testPage.getUrl(), 'Sample') def test_getChildren(self): self.assertEqual(self.testPage.getChildren(), [])
def test_is_url_an_html_page__html__html_page(self): # Arrange # Act subject = Page("http://cnn.com") result = subject.is_url_an_html_page() # Assert self.assertEqual(result, True, "Expected HTML file")
def test_is_url_an_html_page__image__not_html_page(self): # Arrange # Act subject = Page( "https://farm2.static.flickr.com/1193/5133054365_0170d20672.jpg") result = subject.is_url_an_html_page() # Assert self.assertEqual(result, False, "Expected image file")
def dummy_questionaire(): question_list = dummy_questions() page_list = Page_List.Page_List() page = Page.Page(question_list) page_list.add_obj(page) page_list.add_obj(Page.Page(dummy_questions2())) page_list.add_obj(Page.Page(dummy_questions3())) return page_list
def step_impl(context, url): url = world.replace_variables(url) page = Page(url=url) page.driver = world.driver page.wait_for_loading() if url in world.driver.current_url: return True else: log.failed("Verify url contains?", world.driver.current_url, url)
def create_page(self, name, metainfo): path = self.db_path print(name, path) self.list_of_pages.add(name) if not os.path.exists(path): page = Page(name, metainfo) page.save() else : page = self.get_page(name) return page
def __call__(self): # Content filename = CTK.request.url.split('/')[-1] left = IndexBox(filename) right = HelpBox(filename) # Build the page page = Page() page += left page += right return page.Render()
def create_page(cls, doc): page = Page() page.name = doc['page']['name'] if "url" in doc['page']: page.url = doc['page']['url'] # if "route" in doc['page']: # page.route = doc['page']['route'] if "url_paths" in doc['page']: for item in doc['page']['url_paths']: name = item['name'] path = item['path'] if not name in page.url_paths: page.url_paths[name] = path else: log.warning("Duplicate url path name!", { 'page': page.name, 'path': name }) if "elements" in doc: for item in doc['elements']: if not "name" in item: log.warning("Found unnamed element!", { 'page': doc['page']['name'] }) if not "xpath" in item: log.warning("XPath not specified!", { 'page': doc['page']['name'], 'element': item['name'] }) element = Element(item['name'], item['xpath']) element.driver = page.driver if not element.name in page.elements: page.elements[element.name] = element else: log.warning("Duplicate element name!", { 'page': page.name, 'element': element.name }) # if "site_url" in doc: # page.site_config = doc['site_url'] # page.site_config['default'] = page.url return page
def main(): configure = get_args_read() if not 'host' in configure: printError('There should be "host" field in your map') sys.exit(1) if not 'path' in configure: printError('There should be "path" field in your map') sys.exit(1) if 'max_parallel_pages' in configure: MAX_PARALLEL_PAGES = configure['max_parallel_pages'] else: MAX_PARALLEL_PAGES = 1 if 'page_limit' in configure: page_limit_count = configure['page_limit'] else: page_limit_count = 0 cj = cookielib.MozillaCookieJar() cj.load('./cookies.txt') HttpFetchProcess.start() main_dict = configure main_request = PageDelegate.HttpRequest(configure['host'], configure['path'], jar = cj) main_page_request = Page.PageRequest(main_request, main_dict) new_page_requests = [] sleeper = SleepForClass() while True: for i in range(MAX_PARALLEL_PAGES): main_page_delegate = PageDelegate.PageDelegate() if not new_page_requests: if i == 0: page_request = main_page_request if page_limit_count != 0: page_request.set_limit(Page.PageLimit(page_limit_count)) sleeper.sleep_if_not_empty() Page.do_page(page_request, main_page_delegate, new_page_requests) sleeper.inc_count() break else: page_request = new_page_requests.pop(0) page_limit = page_request.get_limit() if page_limit: if page_limit.is_out(): new_page_requests = [] break page_limit.dec() Page.do_page(page_request, main_page_delegate, new_page_requests) sleeper.inc_count() while HttpFetchProcess.next(): pass printDebug('<!------------------------------ count = ' + str(sleeper.count_)) sleeper.check()
def initialize_page(**kwargs): """ :rtype: E-Commerce.simple_cms.models.Page """ kwargs.setdefault("_current_language", "en") kwargs.setdefault("title", "test") kwargs.setdefault("url", str(uuid.uuid4())) kwargs.setdefault("content", CONTENT) if kwargs.pop("eternal", False): kwargs.setdefault("available_from", datetime.datetime(1900, 1, 1)) kwargs.setdefault("available_to", datetime.datetime(2900, 1, 1)) page = Page(**kwargs) page.full_clean() return page
def __call__ (self): title = _("Cherokee Project Downloads") # Page page = Page.Page_Menu (title=title) # Dispatcher tmp = re.findall (r'%s/(.*)$'%(URL_BASE), CTK.request.url) if not tmp: page += Index('/') return page.Render() # Check path path = tmp[0] fp = os.path.realpath (os.path.join (DOWNLOADS_LOCAL, path)) if not fp.startswith (DOWNLOADS_LOCAL): page += CTK.RawHTML ("Nice try") return page.Render() if not os.path.exists (fp): return CTK.HTTP_Response (404) # It is a directory if os.path.isdir (fp): page += Index (fp[len(DOWNLOADS_LOCAL):]) return page.Render() return CTK.HTTP_XSendfile (fp)
def default(message=None): # Authentication ok = Role.user_has_roles([Role.ROLE_UPLOADER, Role.ROLE_ADMIN]) if not ok: auth = Auth.Auth() if not auth.is_logged_in(): return CTK.HTTP_Redir('/auth') return CTK.HTTP_Redir('/') collections_dict = Collection.get_collections_dict() collections_lst = collections_dict.keys() acl = ACL() collections = acl.filter_collections("co", collections_lst) # Render page = Page.Default() if Role.user_has_role(Role.ROLE_ADMIN): page += CTK.RawHTML("<h1>%s: Colecciones</h1>" % (ADMIN_LINK)) else: page += CTK.RawHTML("<h1>Administración de Colecciones</h1>") if len(collections): cols = [(Collection.Collection, x) for x in collections] page += Paginate(cols, WidgetCollection.DefaultWidget) page += CTK.RawHTML("<p>%s</p>" % LINK_HREF % ('%s/new' % LOCATION, 'Añadir colección')) if message: page += Message(message) return page.Render()
def edit_collection(): # Authentication ok = Role.user_has_roles([Role.ROLE_UPLOADER, Role.ROLE_ADMIN]) if not ok: return CTK.HTTP_Redir('/') # Table url = clear_params(CTK.request.url) collection_id = url.split('/')[-1] acl = ACL() editable = acl.filter_collections("ed", [collection_id]) if not int(collection_id) in editable: return CTK.HTTP_Error(401) q = "SELECT collections_id, collections.name,"\ "GROUP_CONCAT(assets.id) AS parts "\ "FROM assets JOIN collections ON collections.id=collections_id "\ "WHERE collections_id='%(collection_id)s';" % locals() table = PropsAutoSQL('%s/edit/apply' % LOCATION, q) table.AddConstant('collection_id', str(collection_id)) table.Add('Nombre', CTK.TextField(), 'name', 'Nombre de la colección') page = Page.Default() page += CTK.RawHTML("<h1>%s: Editar colección</h1>" % (MENU_LINK)) page += table page += ClaimedWidget(c_id=collection_id) page += UnclaimedWidget(c_id=collection_id) return page.Render()
def default(): # Authentication fail = Auth.assert_is_role(Role.ROLE_ADMIN) if fail: return fail entries = [ ('/report', 'icon_report.png', 'Reportes', 'Estadísticas del sistema'), ('/admin/user', 'icon_user.png', 'Usuarios', 'Gestionar cuentas de usuario'), ('/admin/profile', 'icon_profile.png', 'Perfiles', 'Configurar perfiles de negocio'), ('/admin/type', 'icon_type.png', 'Tipos', 'Gestionar tipos de activo'), ('/admin/format', 'icon_format.png', 'Formatos', 'Gestionar formatos de los activos'), ('/admin/license', 'icon_license.png', 'Licencias', 'Configuración de licencias'), ('/asset', 'icon_asset.png', 'Activos', 'Gestionar activos del sistema'), ('/collection', 'icon_collection.png', 'Colecciones', 'Gestionar colecciones del sistema'), ('/admin/acl', 'icon_permission.png', 'ACL', 'Gestionar permisos de activos y colecciones'), ] page = Page.Default(body_id="admin_page") page += CTK.RawHTML('<h1>Administración Activae</h1>') page += wrap(entries) return page.Render()
def page_forbidden(limiter): # Authentication fail = Auth.assert_is_role(Role.ROLE_UPLOADER) if fail: return fail for key, value in limiter.limits.items(): if value == 0: limiter.limits[key] = NO_LIMIT rows = [('Número de archivos', limiter.usage['files'], limiter.limits['files']), ('Total del sistema', limiter.usage['total'], limiter.limits['total']), ('Tamaño máximo de archivo', limiter.usage['size'], limiter.limits['size'])] header = ['Restricción', 'Uso', 'Límite'] table = CTK.Table() table[(1, 1)] = [CTK.RawHTML(x) for x in header] table.set_header(row=True, num=1) for x in range(len(rows)): table[(x + 2, 1)] = [CTK.RawHTML(str(column)) for column in rows[x]] page = Page.Default() page += CTK.RawHTML("<h1>%s: Subir ficheros</h1>" % (MENU_LINK)) page += CTK.RawHTML("<h2>Se han excedido los límites</h2>") page += table return page.Render()
def new_profile(): # Authentication fail = Auth.assert_is_role(Role.ROLE_ADMIN) if fail: return fail table = CTK.PropsTable() table.Add('Nuevo Profile', CTK.TextField({ 'name': 'name', 'class': "required" }), "Nombre del nuevo profile") table.Add('Descripcion', CTK.TextField({ 'name': 'description', 'class': "required" }), "Descripcion del profile que se esta dando de alta") form = CTK.Submitter("/admin/profile/new/apply") form += table page = Page.Default() page += CTK.RawHTML("<h1>%s: Añadir Profile</h1>" % (PROFILES_PREFIX)) page += form return page.Render()
def __call__ (self): title = "Screencasts" page = Page.Page_Menu_Side (title=title) page += CTK.RawHTML ("<h1>%s</h1>"%(title)) # Sidebar page.sidebar += Menu() # Redirect the hashtag page += CTK.RawHTML (js=JS) # Content video = None tmp = re.findall (r'^%s.*/(.*)$'%(URL_BASE), CTK.request.url, re.I) if tmp: for v in VIDEOS: if v[0] == tmp[0].lower(): video = v break if not video: video = VIDEOS[0] box = CTK.Box () box += CTK.RawHTML ('<h2>%s</h2>' %(video[2])) box += CTK.RawHTML (EMBED_VIDEO_HTML%({'num': video[1]})) box += CTK.RawHTML ('</br><p><b>%s</b></p>' %(DEPRECATED)) page += box return CTK.HTTP_Cacheable (60, body=page.Render())
def post_signup(): checked_form = Check.check_signup_form(request.form) if checked_form.get('valid') == True: return redirect( url_for('get_welcome', username=checked_form.get('username'))) else: return Page.render_signup(checked_form)
def add_page_UI(page_list, page=None): if page is None: page = Page.Page() page_list.add_obj(page) return initalize_page_UI(page_list, page)
def check_signup_form(form): us_username = form['username'] us_pd1 = form['password'] us_pd2 = form['verify'] us_email = form['email'] print('%s\t%s\t%s\t%s\n' % (us_username, us_pd1, us_pd2, us_email)) checked_form = Page.get_default_signup_args() checked_form['username'] = us_username checked_form['email'] = us_email valid = True if not valid_username(us_username): checked_form['username_error'] = 'Invalid username' valid = False if not valid_email(us_email): checked_form['email_error'] = 'Invalid email' valid = False if not valid_verify(us_pd1, us_pd2): checked_form['verify_error'] = 'Password not matched' valid = False elif not valid_password(us_pd1): checked_form['password_error'] = 'Invalid password' valid = False if valid: checked_form['valid'] = valid return checked_form
def __call__(self): # Authentication fail = Auth.assert_is_role(Role.ROLE_UPLOADER) if fail: return fail operation = Limiter() if not operation.is_allowed(): return page_forbidden(operation) parent_id = CTK.request.url[len('%s/evolve/parent=' % LOCATION):].split('/')[0] self.params = {'parent_id': parent_id} link = LINK_HREF % ('%s/evolve/parent=%s' % (PAGEASSET_LOCATION, parent_id), 'Crear sin fichero adjunto') self.page = Page.Default() self.page += CTK.RawHTML("<h1>%s: Subir ficheros</h1>" % (MENU_LINK)) self.page += CTK.Uploader( { 'handler': report_upload_evolve, 'target_dir': self.target_dir }, self.params, direct=False) self.page += CTK.RawHTML(link) return self.page.Render()
def loadScript(self,lessonScriptPath): lessonScriptPath = self.completeLessonScriptPath(lessonScriptPath) courseName = 'STANDARD_COURSE' ###- TODO, implement get course name course = Course.Course(courseName,self.application) moduleName = self.getModuleNameFromLessonScriptPath(lessonScriptPath) lessons = {} module = Module.Module(moduleName,lessons,course) lessonName = self.getLessonNameFromLessonScriptPath(lessonScriptPath) pages = {} module.lessons[lessonName] = Lesson.Lesson(lessonName,pages,module) try : with open(lessonScriptPath,"r",encoding="utf-8") as scriptFile : for lessonScriptLine in scriptFile : if lessonScriptLine != '\n' : pageName = self.getPageNameFromLessonScriptLine(lessonScriptLine) pageScript = lessonScriptLine.strip() module.lessons[lessonName].pages[pageName] = Page.Page(pageName,pageScript,module.lessons[lessonName]) except : with open(lessonScriptPath,"+w",encoding="utf-8") as scriptFile : pass return module.lessons[lessonName]
def __call__(self): title = _('Information Sources') page = Page.Base(title, helps=HELPS) page += CTK.RawHTML("<h1>%s</h1>" % (title)) page += Panel() return page.Render()
def readLines(file): content = open(file).readlines() for line in content: try: Page(line) except: pass
def __call__(self): container = CTK.Container() container += CTK.RawHTML("<h2>%s</h2>" % (_('Create a new configuration file:'))) key = 'regular' name = _('Regular') label = _( 'Regular configuration: Apache logs, MIME types, icons, etc.') container += Form(key, name, label) key = 'static' name = _('Static Content') label = _('Optimized to send static content.') container += Form(key, name, label) key = 'development' name = _('Server Development') label = _('No standard port, No log files, No PID file, etc.') container += Form(key, name, label) conf_file = CTK.cfg.file notice = CTK.Notice('warning') notice += CTK.RawHTML("<b>%s</b><br/>" % (_(WARNING_NOT_FOUND_1) % (locals()))) notice += CTK.RawHTML(WARNING_NOT_FOUND_2) page = Page.Base(_('New Configuration File'), body_id='new-config', helps=HELPS) page += CTK.RawHTML("<h1>%s</h1>" % (_('Configuration File Not Found'))) page += notice page += CTK.Indenter(container) return page.Render()
def newPage(self, date): page_created = False if not self.isPage(date): page = Page(date, []) self.pages.append(page) page_created = True return page_created
def __call__(self): title = _('Status') # Content left = CTK.Box({'class': 'panel'}) left += CTK.RawHTML('<h2>%s</h2>' % (title)) right = CTK.Box({'class': 'status_content'}) left += CTK.Box({'class': 'filterbox'}, CTK.TextField({ 'class': 'filter', 'optional_string': _('Virtual Server Filtering'), 'optional': True })) left += CTK.Box({'id': 'status_panel'}, self.PanelList(right)) # Build the page page = Page.Base(title, body_id='status', helps=HELPS, headers=Submit_HEADER) page += left page += right return page.Render()
def __call__ (self): Cherokee.pid.refresh() # Top top = CTK.Box({'id': 'top-box'}) top += CTK.RawHTML ("<h1>%s</h1>"% _('Welcome to Cherokee Admin')) top += LanguageSelector() # Content: Left mainarea = CTK.Box({'id': 'main-area'}) mainarea += ServerInfo() mainarea += CPUInfo() mainarea += MemoryInfo() mainarea += CommunityBar() # Content: Right sidebar = CTK.Box({'id': 'sidebar'}) sidebar += SupportBox() sidebar += HaltAdmin() # Content cont = CTK.Box({'id': 'home-container'}) cont += mainarea cont += sidebar # Page page = Page.Base(_('Welcome to Cherokee Admin'), body_id='index', helps=HELPS) page += top page += cont page += CTK.RawHTML (js=JS_SCROLL) return page.Render()
def __init__(self, url_arg, max_iter_arg, max_time_arg): self.max_iter = max_iter_arg self.max_time = max_time_arg self.start_url = url_arg self.graph = None self.page_array = [] self.home_page = Page.Page(0, url_arg)
def default_admin(message=None, assets=None): not_admin = Auth.assert_is_role(Role.ROLE_ADMIN) if not_admin: return not_admin # Render page = Page.Default() page += CTK.RawHTML("<h1>%s: Permisos</h1>" % ADMIN_LINK) contents = get_admin_contents() main = CTK.Container() if len(contents): main += Paginate(contents, DefaultWidget) else: main += CTK.RawHTML("<h2>No hay activos.</h2>") if assets != None: page += CTK.RawHTML("<h2>Activos buscados</h2>") page += acl_get_custom(assets) tabs = CTK.Tab() tabs.Add('Activos', main) tabs.Add('Búsqueda', WidgetLookup.get_fields_form(ACL_ADMIN_LOOKUP)) page += tabs if message: page += Message(message) return page.Render()
def __call__ (self): title = _("Quickstart Guide") # Install Cherokee os_combo = OS_Panel() druid = CTK.Druid (CTK.RefreshableURL ('%s/%s'%(URL_BASE, os_combo.os_selected))) box = CTK.Box ({'id': 'platform-box'}) box += os_combo step1 = CTK.Box({'id': 'qs-step-1', 'class': 'qs-step'}) step1 += box step1 += CTK.RawHTML ("<h1>%s</h1>" %(title)) step1 += CTK.RawHTML ('<h2>%s</h2>' %(_("Install Cherokee"))) step1 += druid os_combo.bind ('change', druid.JS_to_goto('"%s/"+$("#%s").val()' %(URL_BASE, os_combo.combo.id))) # Page page = Page.Page_Menu_Side (title=title) page += step1 # Development version page.sidebar += Development_Version() # Mirros mirrors = Mirror_Sites.Mirrors() page.sidebar += mirrors # This page cannot be cached. It'd break the OS detection. return page.Render()
def __init__(self, controller): super(Gui, self).__init__() self.listPage = [] #self.geometry("800x600") for i in range(5): self.listPage.append(Page(self, i, controller)) self.listPage[0].grid(row=0, column=0, sticky="nsew")
def save(self, pcb, program): pages = [] count = 0 marco_current = self.get_marco_free() page_current = Page(marco_current) for instruction in program.getInstrucciones(): if page_current.get_size >= count: page_current.add_number_instruction(pcb.get_pc()) self.memory.write(marco_current.next_free_pos(), instruction) count += 1 else: count = 0 pages.append(page_current) marco_current = self.get_marco_free() page_current = Page(marco_current) pages.append(page_current) self.blockTable.put(pcb.get_pid(), pages)
def step_impl(context, url): url = world.replace_variables(url) page = Page() page.url = url page.driver = world.driver page.go() page.wait_for_loading()
def GET(self): user = utils.getuser_by_cookie() if user is None : raise web.seeother('/') if user['admin']==0: raise web.seeother('/') blogs,page = Page.get_blogs_by_page(user['name']) return render.manage(blogs=blogs,page=page,user=user)
def GET(self): user = utils.getuser_by_cookie() if user is None : raise web.seeother('/') if user['admin']==0: raise web.seeother('/') blogs,page = Page.get_blogs_by_page(user['name']) for blog in blogs: blog['content'] = markdown2.markdown(blog['content']) #自定义字典 #{'blogs':blogs,'blog':blog} # d = {'blogs':blogs,'page':page} return {'blogs':blogs,'page':page}
def load(self): self.start_time = datetime.now() self.max_time = timedelta(seconds=2) self.page = Page.fetch(self.url, 5) if(self.page): soup = BeautifulSoup(self.page) items = soup.findAll('li', {"class": re.compile('.*listingCard.*')}) self.items = map(self.process_item, items) for item in self.items: item.load_details() time = datetime.now() if(time > self.start_time + self.max_time): # Stop due to time contraint break return self.items
def main(): P = Page( "my_page", "A new page is born" ) p1 = Panel( "panel1" ) p2 = Panel( "panel2" ) p2.load_from_file( 'text.txt' ) p1.add_panel( p2 ) P.add_panel( p1 ) p3 = Panel( "panel3" ) p4 = Panel( "panel4" ) p5 = Panel( "panel5" ) p5.load( "There are new things happening." ) p4.add_panel( p5 ) p3.add_panel( p4 ) P.add_panel( p3 ) # render everything P.render() # display print P
def load_details(self): self.process_details(Page.fetch(self.link, 0, False))
def setUp(self): self.testPage = Page('Sample')
def get_page(self, name): page = Page(name) page.load() return page
def analysis(links_list, top_mots, crawling, width, coherence, recursions): # pour compter à quelle récursion nous sommes il suffit d'avoir len(links_list) """ Fonction qui analyse un lien initial, ceux associés, en faisant appel à elle-même à la fin de chaque exécution si le nombre d'itérations effectuées (lisible par `len(links_list)`) n'est pas égale à `recursions`. * `links_list`: liste de listes, une pour chaque niveau. Au [0][0] on trouve le lien saisi par l'utilisateur. Remplie au four et à mésure, sert comme test d'arrêt. * `top_mots`: nombre des mots à sélectionner dans la prémière page analysée. Une valeur fixe, utilisée seulement lors de la prémière itération. * `crawling`: liste de listes d'objets URLWords, elle contient les url, `crawling[0][0].address` et une liste avec les résultats `crawling[0][0].results`. Remplie au four et à mésure, sera retournée à la fin de l'exécution. pourrait éventuellement servir comme test d'arrêt. * `width`: limite de l'analyse en horizontal, on ne prendra pas plus de liens que ceci. Valeur fixe. * `coherence`: variable qui détermine le niveau audessus duquel les mots doivent être présents dans les pages associées pour être retenus parmi les résultats. Valeur fixe. * `recursions`: nombre de niveaux (celui de départ inclus) qui seront analysés. Valeur fixe, sert comme test d'arrêt. """ if len(links_list) == 1: lienparse = urlparse(links_list[0][0]) if lienparse.scheme == '': # ici on pourrait ajouter d'autres tests: vérifier par exemple que celui fourni est un nom de domaine valide links_list[0][0] = 'http://' + links_list[0][0] # ici tester si la page donne un 200 (r.status_code), else "veuillez tester votre url" Pag = Page(links_list[0][0], width) Pag.wordcount() # On récupère les mots dans la page et leur occurrence. Dans la fonction définie dans la classe Page.py il faut intégrer le travail sur les stopwords. res_lev = URLWords(Pag) # On crée un objet URLWords, il ne contient que l'URL de Page1. res_lev.results = Pag.results_level1(top_mots) # On ajoute les mots plus présents level = [] # Liste pour les résultats URLWords du niveau level.append(res_lev) crawling = [] crawling.append(level) # Tous les résultats iront dans une seule variable faite de listes d'éléments URLWords. database = db() # On est arrivé jusque là, on a des résultats à sauvegarder en base de données, donc autant créer notre objet db try: # tout sauvegarde en BDD est mise dans un `try` afin d'éviter que cela puisse bloquer l'exécution du programme crawling[len(links_list) -1 ][0].save1(database) # URL et mots associés sont sauvegardés dans les tables url et words except: pass links_list.append(Pag.links) analysis(links_list, top_mots, crawling, width, coherence, recursions) else: level = [] # Liste pour les résultats URLWords du niveau links_level = [] # Liste pour les liens réceuillis dans ce niveau for link in links_list[len(links_list) - 1]: # pas besoin de limiter l'itération horizontale ici, car nous avons déjà limité les liens collectés dans Page.links via le paramètre `width` if len(links_list) == recursions: Pag = Page(link, 0) # quand on est au dernier niveau de la récursion on ne va pas chercher les liens contenus dans les pages else: Pag = Page(link, width) # de chaque lien on fait un objet Page. for lk in Pag.links: # test pour éviter de mettre plusieurs fois le même lien dans la liste. On ne veut pas mettre à nouveau le lien de la page source ni plusieurs fois le même lien if lk not in links_list and lk not in links_level: # test à améliorer: www.example.com et example.com seront pris tous les deux. links_level.append(lk) Pag.wordcount() # de chaque page on compte les mots res_lev = URLWords(Pag) # On crée un objet pour chaque page res_lev.results = Pag.find_same_words(crawling[0][0], coherence) # On garde trace des résultats. S'il n'y a pas de mots qui reviennent `coherence`% ou plus, la liste sera vide. level.append(res_lev) # on ajoute le résultat dans la liste links_list.append(links_level) crawling.append(level) database = db() try: for i in crawling[len(links_list) -1]: i.savefollow(database, len(links_list)) except: pass if len(links_list) < (recursions + 1): # à ce moment on a une liste remplie pour le prochain niveau, pas encore analysée. Du coup on la confronte à `recursions + 1` analysis(links_list, top_mots, crawling, width, coherence, recursions) return crawling