def link(cls):

        """
        Link documents -> institutions.
        """

        domain_to_inst = defaultdict(list)

        # Map domain -> [(regex, inst), ...]
        for inst in ServerSide(Institution.select()):

            domain = parse_domain(inst.url)

            regex = seed_to_regex(inst.url)

            domain_to_inst[domain].append((regex, inst))

        for doc in query_bar(Document.select()):

            try:

                # TODO: Get rid of @property.
                url = doc.syllabus.url

                domain = parse_domain(url)

                # Find institutions with matching URLs.
                matches = []
                for pattern, inst in domain_to_inst[domain]:

                    match = pattern.search(url)

                    if match:
                        matches.append((match.group(), inst))

                if matches:

                    # Sort by length of match, descending.
                    matches = sorted(
                        matches,
                        key=lambda x: len(x[0]),
                        reverse=True,
                    )

                    # Link to the institution with the longest match.
                    cls.create(
                        institution=matches[0][1],
                        document=doc,
                    )

            except Exception as e:
                print(e)
    def ingest_world(cls,
        package='osp.institutions',
        path='data/world.csv',
    ):

        """
        Insert world universities.
        """

        reader = read_csv(package, path)

        for row in reader:
            if row['country'] != 'US':

                # Normalize the URL.
                url = row['url'].strip()
                domain = parse_domain(url)

                # Clean the fields.
                name = row['name'].strip()
                country = row['country'].strip()

                try:
                    cls.create(
                        name=name,
                        url=url,
                        domain=domain,
                        state=None,
                        country=country,
                    )

                except IntegrityError:
                    pass
    def ingest_usa(cls,
        package='osp.institutions',
        path='data/usa.csv',
    ):

        """
        Insert US universities.
        """

        reader = read_csv(package, path)

        for row in reader:
            if row['e_country'] == 'USA':

                # Normalize the URL.
                url = row['web_url'].strip()
                domain = parse_domain(url)

                # Clean the fields.
                name = row['biz_name'].strip()
                state = row['e_state'].strip()

                try:
                    cls.create(
                        name=name,
                        url=url,
                        domain=domain,
                        state=state,
                        country='US',
                    )

                except IntegrityError:
                    pass
    def domain(self):
        """
        Get the parsed domain of the syllabus' URL.

        Returns:
            str: The top-level domain.
        """

        return parse_domain(self.url)
    def domain(self):

        """
        Get the parsed domain of the syllabus' URL.

        Returns:
            str: The top-level domain.
        """

        return parse_domain(self.url)
def test_parse_domain(url, domain):
    assert parse_domain(url) == domain
def test_parse_domain(url, domain):
    assert parse_domain(url) == domain