Esempio n. 1
0
 def __init__(self, query):
     """
     Construct with a search query.
     :param query: string, the search query to use.
     """
     self.query = query
     self.db = get_database_connection()
     self.elapsed_time = None
Esempio n. 2
0
def get_allowed_domains():
    """
    Get a list of the domains that are allowed to be indexed.
    Each domain is a FQDN without the scheme or slashes.
    For example: 'hsleiden.nl'
    :return: list
    """
    db = get_database_connection()
    query = "select distinct domain from urls"
    cursor = db.cursor()
    cursor.execute(query)
    db.commit()
    return [x['domain'] for x in cursor.fetchall()]
Esempio n. 3
0
from urllib.parse import urlparse, urlunparse
from retrouve.database.model import Model, get_database_connection
import psycopg2

db = get_database_connection()


class Url(Model):
    """
    Represents a URL as it is stored in the database.
    """

    def __init__(self, **kwargs):
        """
        Construct the URL, and parse the URL into parts right away.
        :param kwargs:
        """
        super().__init__(**kwargs)
        if hasattr(self, 'url'):
            self.parse_url()

    def parse_url(self):
        """
        Parse the URL into its components, using a base URL when possible.
        The URL components are stored in the internal __parts property.
        """
        self.__parts = urlparse(self.url)
        if hasattr(self, 'base'):
            if isinstance(self.base, str):
                self.base = urlparse(self.base)
            elif isinstance(self.base, Url):