def add_urls_to_db(session, url, visited, found=None, target_id=None): """Adds a URL to the DB :param url: URL to be added :type url: `str` :param visited: Visited or not :type visited: `bool` :param found: True/False :type found: `bool` :param target_id: Target ID :type target_id: `int` :return: None :rtype: None """ if is_url(url): # New URL # Make sure URL is clean prior to saving in DB, nasty bugs # can happen without this url = url.strip() scope = is_url_in_scope(url) session.merge( models.Url(target_id=target_id, url=url, visited=visited, scope=scope)) session.commit()
def import_processed_url(session, urls_list, target_id=None): """Imports a processed URL from the DB :param urls_list: List of URLs :type urls_list: `list` :param target_id: Target ID :type target_id: `int` :return: None :rtype: None """ for url, visited, scope in urls_list: session.merge( models.Url(target_id=target_id, url=url, visited=visited, scope=scope)) session.commit()
def import_urls(session, url_list, target_id=None): """Extracts and classifies all URLs passed. Expects a newline separated URL list :param url_list: List of urls :type url_list: `list` :param target_id: target ID :type target_id: `int` :return: List of imported URLS :rtype: `list` """ imported_urls = [] for url in url_list: if is_url(url): imported_urls.append(url) session.merge(models.Url(url=url, target_id=target_id)) session.commit() return imported_urls # Return imported urls