Example #1
0
def add_urls_to_db(session, url, visited, found=None, target_id=None):
    """Adds a URL to the DB

    :param url: URL to be added
    :type url: `str`
    :param visited: Visited or not
    :type visited: `bool`
    :param found: True/False
    :type found: `bool`
    :param target_id: Target ID
    :type target_id: `int`
    :return: None
    :rtype: None
    """
    if is_url(url):  # New URL
        # Make sure URL is clean prior to saving in DB, nasty bugs
        # can happen without this
        url = url.strip()
        scope = is_url_in_scope(url)
        session.merge(
            models.Url(target_id=target_id,
                       url=url,
                       visited=visited,
                       scope=scope))
        session.commit()
Example #2
0
def import_processed_url(session, urls_list, target_id=None):
    """Imports a processed URL from the DB

    :param urls_list: List of URLs
    :type urls_list: `list`
    :param target_id: Target ID
    :type target_id: `int`
    :return: None
    :rtype: None
    """
    for url, visited, scope in urls_list:
        session.merge(
            models.Url(target_id=target_id,
                       url=url,
                       visited=visited,
                       scope=scope))
    session.commit()
Example #3
0
def import_urls(session, url_list, target_id=None):
    """Extracts and classifies all URLs passed. Expects a newline separated
    URL list

    :param url_list: List of urls
    :type url_list: `list`
    :param target_id: target ID
    :type target_id: `int`
    :return: List of imported URLS
    :rtype: `list`
    """
    imported_urls = []
    for url in url_list:
        if is_url(url):
            imported_urls.append(url)
            session.merge(models.Url(url=url, target_id=target_id))
    session.commit()
    return imported_urls  # Return imported urls