def bring_orgs_to_memory(self): """ Make a db call to get all organizations and trim it down to a smaller model to conserve space. Returns: small_orgs ([SmallOrganization]): A list of all organizations from the database, converted to SmallOrganization objects to save some space. """ # grab all organizations tmp_org_dtos = self.org_dao.all( "id", "page_rank_info.total", "organization_url", "page_rank_info.total_with_self", "page_rank_info.references.count", "page_rank_info.references.org_domain", "page_rank_info.references.pages.url", "page_rank_info.references.pages.count", ) # convert the dtos to this smaller model small_orgs = [ SmallOrganization(DTOConverter.from_dto(Organization, o), o.id) for o in tmp_org_dtos if o.organization_url ] small_orgs = [o for o in small_orgs if o.org_domain] # At this point, tmp_org_dtos gets gc'd and we're left with the minimum data needed for calculation return small_orgs
def test_org_dto_converter(self): my_org = Organization( name='Yoyodyne, Inc.', address="1234 Yoyodyne Way, San Narciso, CA", types=[OrgTypesEnum.GOVERNMENT, OrgTypesEnum.RESEARCH], phone_numbers=["4026170423"], emails=["*****@*****.**"], contacts=[], organization_url="www.yoyodyne.com", page_rank_info=PageRankInfo( total_with_self=10, total=8, references=[ PageRankVector(org_domain='yoyodyne.com', count=2, pages=[ UrlCountPair( url='http://www.yoyodyne.com/', count=2) ]), PageRankVector( org_domain='trystero.org', count=4, pages=[ UrlCountPair(url='http://www.yoyodyne.com/', count=3), UrlCountPair( url='http://www.yoyodyne.com/contacts.php', count=1) ]), PageRankVector(org_domain='thurnandtaxis.info', count=4, pages=[ UrlCountPair( url='http://www.yoyodyne.com/', count=4) ]) ])) print 'Converting an organization model to a DTO.' org_dto = DTOConverter.to_dto(OrganizationDTO, my_org) print 'Testing equality...' for attr, value in my_org.__dict__.iteritems(): if attr == 'page_rank_info': self._compare_page_rank_info(my_org, org_dto) else: self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr), "{0} attribute not equal".format(attr)) print 'Converting a DTO to an organization.' my_org = DTOConverter.from_dto(Organization, org_dto) print 'Testing equality...' for attr, value in my_org.__dict__.iteritems(): if attr == 'page_rank_info': self._compare_page_rank_info(my_org, org_dto) else: self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr), "{0} attribute not equal".format(attr))
def test_contact_dto_converter(self): my_contact = Contact( first_name="Jordan", last_name="Degner", phones=['4029813230'], email="*****@*****.**", position="Software Engineer", ) print 'Converting a contact to a DTO.' contact_dto = DTOConverter.to_dto(ContactDTO, my_contact) print 'Testing equality...' for attr, value in my_contact.__dict__.iteritems(): self.assertEqual(getattr(my_contact, attr), getattr(contact_dto, attr), "{0} attribute not equal".format(attr)) print 'Converting a DTO to a contact.' my_contact = DTOConverter.from_dto(Contact, contact_dto) print 'Testing equality...' for attr, value in my_contact.__dict__.iteritems(): self.assertEqual(getattr(my_contact, attr), getattr(contact_dto, attr), "{0} attribute not equal".format(attr))
def test_item_converter(self): ctx = ApplicationContext(TestableDAOContext()) print 'Creating organization and contact item.' org = ctx.get_object('OrganizationDAO') org_dto = OrganizationDTO(name="Univerisityee of Nyeebraska-Lincoln") org.create_update(org_dto) org_model = DTOConverter.from_dto(Organization, org_dto) contact_item = ScrapedContact( first_name='Bee', last_name='Yee', organization={'name': "Univerisityee of Nyeebraska-Lincoln"}) print 'Converting contact to model.' converter = ctx.get_object('ModelConverter') model_contact = converter.to_model(Contact, contact_item) self.assertEqual(model_contact.organization.name, org_model.name)
def test_item_converter(self): ctx = ApplicationContext(TestableDAOContext()) print 'Creating organization and contact item.' org = ctx.get_object('OrganizationDAO') org_dto = OrganizationDTO(name="Univerisityee of Nyeebraska-Lincoln") org.create_update(org_dto) org_model = DTOConverter.from_dto(Organization, org_dto) contact_item = ScrapedContact(first_name='Bee', last_name='Yee', organization={'name': "Univerisityee of Nyeebraska-Lincoln"} ) print 'Converting contact to model.' converter = ctx.get_object('ModelConverter') model_contact = converter.to_model(Contact, contact_item) self.assertEqual(model_contact.organization.name, org_model.name)
def _monitor_cache(dao, max_size, cache, job_queue, job_cond, fill_cond, empty_cond, req_doms, blk_doms, srt_list, logger_lock): while True: try: with job_cond: next_job = job_queue.get(block=False) except Empty: with job_cond: job_cond.wait(1) try: next_job = job_queue.get(block=False) except Empty: continue if next_job == CacheJobs.Fill: with logger_lock: logger.info('Filling the cache') with fill_cond: urls = dao().findmany_by_domains(max_size - cache.qsize(), req_doms, blk_doms, srt_list) for u in urls: url_obj = DTOConverter.from_dto(URLMetadata, u) try: cache.put(url_obj) except Full: break fill_cond.notify_all() elif next_job == CacheJobs.Empty: with logger_lock: logger.info('Emptying the cache') with empty_cond: while True: try: cache.get(block=False) except Empty: break empty_cond.notify()
def test_contact_dto_converter(self): my_contact = Contact(first_name="Jordan", last_name="Degner", phones=['4029813230'], email="*****@*****.**", position="Software Engineer", ) print 'Converting a contact to a DTO.' contact_dto = DTOConverter.to_dto(ContactDTO, my_contact) print 'Testing equality...' for attr, value in my_contact.__dict__.iteritems(): self.assertEqual(getattr(my_contact, attr), getattr(contact_dto, attr), "{0} attribute not equal".format(attr)) print 'Converting a DTO to a contact.' my_contact = DTOConverter.from_dto(Contact, contact_dto) print 'Testing equality...' for attr, value in my_contact.__dict__.iteritems(): self.assertEqual(getattr(my_contact, attr), getattr(contact_dto, attr), "{0} attribute not equal".format(attr))
def test_page_rank(self): print 'Creating PageRankPreprocessor' prp = self.ctx.get_object('PageRankPreprocessor') print 'Bring organizations to memory' orgs = prp.bring_orgs_to_memory() print 'Cleaning organizations' orgs = prp.cleanup_data(orgs) print 'Creating dat matrix' matrix = prp.create_matrix(orgs) self.assertIsNotNone(matrix) print 'Creating the dampened google matrix' matrix = google_matrix(matrix) print 'Generating eigenvector' vector = left_eigenvector(matrix) print 'Creating PageRankPostprocessor' post = self.ctx.get_object('PageRankPostprocessor') print 'Assigning ranks to organizations' orgs = post.give_orgs_ranks(orgs, vector) print 'Storing organizations' post.store_organizations(orgs) dao = self.ctx.get_object('OrganizationDAO') new_org_models = [DTOConverter.from_dto(Organization, o) for o in dao.all()] self.assertEqual(len(new_org_models), len(self.assert_models), "Error: returned model number different than expected") for model in self.assert_models: self._compare_assert_against_test(model, new_org_models)
def test_org_dto_converter(self): my_org = Organization(name='Yoyodyne, Inc.', address="1234 Yoyodyne Way, San Narciso, CA", types=[OrgTypesEnum.GOVERNMENT, OrgTypesEnum.RESEARCH], phone_numbers=["4026170423"], emails=["*****@*****.**"], contacts=[], organization_url="www.yoyodyne.com", page_rank_info=PageRankInfo( total_with_self=10, total=8, references=[ PageRankVector( org_domain='yoyodyne.com', count=2, pages=[ UrlCountPair( url='http://www.yoyodyne.com/', count=2 ) ] ), PageRankVector( org_domain='trystero.org', count=4, pages=[ UrlCountPair( url='http://www.yoyodyne.com/', count=3 ), UrlCountPair( url='http://www.yoyodyne.com/contacts.php', count=1 ) ] ), PageRankVector( org_domain='thurnandtaxis.info', count=4, pages=[ UrlCountPair( url='http://www.yoyodyne.com/', count=4 ) ] ) ] ) ) print 'Converting an organization model to a DTO.' org_dto = DTOConverter.to_dto(OrganizationDTO, my_org) print 'Testing equality...' for attr, value in my_org.__dict__.iteritems(): if attr == 'page_rank_info': self._compare_page_rank_info(my_org, org_dto) else: self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr), "{0} attribute not equal".format(attr)) print 'Converting a DTO to an organization.' my_org = DTOConverter.from_dto(Organization, org_dto) print 'Testing equality...' for attr, value in my_org.__dict__.iteritems(): if attr == 'page_rank_info': self._compare_page_rank_info(my_org, org_dto) else: self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr), "{0} attribute not equal".format(attr))