def _merge_page_rank_info(self, new_references, existing_references, organization_url): if existing_references is None: return new_references org_domain = UrlUtility().get_domain(organization_url) for ref in new_references.references: ref_exists = False # Search for existing references from one organization to another for exist_ref in existing_references.references: if ref.org_domain == exist_ref.org_domain: # We found existing data for references from Org A to Org B ref_exists = True for page in ref.pages: page_exists = False # Search if we have data from this specific URL to this specific organization for exist_page in exist_ref.pages: if page.url == exist_page.url: # We found existing data for references from URL A to Org B page_exists = True count_diff = page.count - exist_page.count if count_diff != 0: # This page must have changed b/c the number of references is different # update everything exist_page.count = page.count exist_ref.count += count_diff existing_references.total_with_self += count_diff if exist_ref.org_domain != org_domain: # This value only updated if Organization A and B are different existing_references.total += count_diff break if not page_exists: # We have recorded other references to this organization, but none from this url exist_ref.pages.append(page) exist_ref.count += page.count existing_references.total_with_self += page.count if exist_ref.org_domain != org_domain: existing_references.total += page.count break # If this organization has not yet referenced the specified outside org, add it if not ref_exists: existing_references.references.append(ref) existing_references.total_with_self += ref.count if ref.org_domain != org_domain: existing_references.total += ref.count return existing_references
def clean_url(self): url = self.cleaned_data['url'] ctx = ApplicationContext(DAOContext()) org_dao = ctx.get_object('OrganizationDAO') url_metadata_dao = ctx.get_object('URLMetadataDAO') try: domain = UrlUtility().get_domain(url) except: raise ValidationError( "Oops! We couldn't find information on that domain.") if org_dao.find(organization_url=domain) or url_metadata_dao.find( domain=domain): raise ValidationError( "Oops! Looks like we already have information on that organization." ) return url