Ejemplo n.º 1
0
    def _merge_page_rank_info(self, new_references, existing_references, organization_url):
        if existing_references is None:
            return new_references

        org_domain = UrlUtility().get_domain(organization_url)
        for ref in new_references.references:
            ref_exists = False
            # Search for existing references from one organization to another
            for exist_ref in existing_references.references:
                if ref.org_domain == exist_ref.org_domain:
                    # We found existing data for references from Org A to Org B
                    ref_exists = True
                    for page in ref.pages:
                        page_exists = False
                        # Search if we have data from this specific URL to this specific organization
                        for exist_page in exist_ref.pages:
                            if page.url == exist_page.url:
                                # We found existing data for references from URL A to Org B
                                page_exists = True
                                count_diff = page.count - exist_page.count
                                if count_diff != 0:
                                    # This page must have changed b/c the number of references is different
                                    # update everything
                                    exist_page.count = page.count
                                    exist_ref.count += count_diff
                                    existing_references.total_with_self += count_diff
                                    if exist_ref.org_domain != org_domain:
                                        # This value only updated if Organization A and B are different
                                        existing_references.total += count_diff
                                break
                        if not page_exists:
                            # We have recorded other references to this organization, but none from this url
                            exist_ref.pages.append(page)
                            exist_ref.count += page.count
                            existing_references.total_with_self += page.count
                            if exist_ref.org_domain != org_domain:
                                existing_references.total += page.count
                    break
            # If this organization has not yet referenced the specified outside org, add it
            if not ref_exists:
                existing_references.references.append(ref)
                existing_references.total_with_self += ref.count
                if ref.org_domain != org_domain:
                    existing_references.total += ref.count
        return existing_references
Ejemplo n.º 2
0
    def clean_url(self):
        url = self.cleaned_data['url']
        ctx = ApplicationContext(DAOContext())
        org_dao = ctx.get_object('OrganizationDAO')
        url_metadata_dao = ctx.get_object('URLMetadataDAO')

        try:
            domain = UrlUtility().get_domain(url)
        except:
            raise ValidationError(
                "Oops! We couldn't find information on that domain.")

        if org_dao.find(organization_url=domain) or url_metadata_dao.find(
                domain=domain):
            raise ValidationError(
                "Oops! Looks like we already have information on that organization."
            )

        return url