def bring_orgs_to_memory(self):
        """
        Make a db call to get all organizations and trim it down to a smaller model to conserve space.

        Returns:
            small_orgs ([SmallOrganization]): A list of all organizations from the database, converted to
                SmallOrganization objects to save some space.
        """

        # grab all organizations
        tmp_org_dtos = self.org_dao.all(
            "id",
            "page_rank_info.total",
            "organization_url",
            "page_rank_info.total_with_self",
            "page_rank_info.references.count",
            "page_rank_info.references.org_domain",
            "page_rank_info.references.pages.url",
            "page_rank_info.references.pages.count",
        )

        # convert the dtos to this smaller model
        small_orgs = [
            SmallOrganization(DTOConverter.from_dto(Organization, o), o.id) for o in tmp_org_dtos if o.organization_url
        ]
        small_orgs = [o for o in small_orgs if o.org_domain]

        # At this point, tmp_org_dtos gets gc'd and we're left with the minimum data needed for calculation
        return small_orgs
Exemple #2
0
    def test_org_dto_converter(self):
        my_org = Organization(
            name='Yoyodyne, Inc.',
            address="1234 Yoyodyne Way, San Narciso, CA",
            types=[OrgTypesEnum.GOVERNMENT, OrgTypesEnum.RESEARCH],
            phone_numbers=["4026170423"],
            emails=["*****@*****.**"],
            contacts=[],
            organization_url="www.yoyodyne.com",
            page_rank_info=PageRankInfo(
                total_with_self=10,
                total=8,
                references=[
                    PageRankVector(org_domain='yoyodyne.com',
                                   count=2,
                                   pages=[
                                       UrlCountPair(
                                           url='http://www.yoyodyne.com/',
                                           count=2)
                                   ]),
                    PageRankVector(
                        org_domain='trystero.org',
                        count=4,
                        pages=[
                            UrlCountPair(url='http://www.yoyodyne.com/',
                                         count=3),
                            UrlCountPair(
                                url='http://www.yoyodyne.com/contacts.php',
                                count=1)
                        ]),
                    PageRankVector(org_domain='thurnandtaxis.info',
                                   count=4,
                                   pages=[
                                       UrlCountPair(
                                           url='http://www.yoyodyne.com/',
                                           count=4)
                                   ])
                ]))

        print 'Converting an organization model to a DTO.'
        org_dto = DTOConverter.to_dto(OrganizationDTO, my_org)

        print 'Testing equality...'
        for attr, value in my_org.__dict__.iteritems():
            if attr == 'page_rank_info':
                self._compare_page_rank_info(my_org, org_dto)
            else:
                self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr),
                                 "{0} attribute not equal".format(attr))

        print 'Converting a DTO to an organization.'
        my_org = DTOConverter.from_dto(Organization, org_dto)

        print 'Testing equality...'
        for attr, value in my_org.__dict__.iteritems():
            if attr == 'page_rank_info':
                self._compare_page_rank_info(my_org, org_dto)
            else:
                self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr),
                                 "{0} attribute not equal".format(attr))
Exemple #3
0
    def test_contact_dto_converter(self):
        my_contact = Contact(
            first_name="Jordan",
            last_name="Degner",
            phones=['4029813230'],
            email="*****@*****.**",
            position="Software Engineer",
        )

        print 'Converting a contact to a DTO.'
        contact_dto = DTOConverter.to_dto(ContactDTO, my_contact)

        print 'Testing equality...'
        for attr, value in my_contact.__dict__.iteritems():
            self.assertEqual(getattr(my_contact, attr),
                             getattr(contact_dto, attr),
                             "{0} attribute not equal".format(attr))

        print 'Converting a DTO to a contact.'
        my_contact = DTOConverter.from_dto(Contact, contact_dto)

        print 'Testing equality...'
        for attr, value in my_contact.__dict__.iteritems():
            self.assertEqual(getattr(my_contact, attr),
                             getattr(contact_dto, attr),
                             "{0} attribute not equal".format(attr))
Exemple #4
0
    def test_item_converter(self):
        ctx = ApplicationContext(TestableDAOContext())
        print 'Creating organization and contact item.'
        org = ctx.get_object('OrganizationDAO')
        org_dto = OrganizationDTO(name="Univerisityee of Nyeebraska-Lincoln")
        org.create_update(org_dto)
        org_model = DTOConverter.from_dto(Organization, org_dto)
        contact_item = ScrapedContact(
            first_name='Bee',
            last_name='Yee',
            organization={'name': "Univerisityee of Nyeebraska-Lincoln"})

        print 'Converting contact to model.'
        converter = ctx.get_object('ModelConverter')
        model_contact = converter.to_model(Contact, contact_item)

        self.assertEqual(model_contact.organization.name, org_model.name)
    def test_item_converter(self):
        ctx = ApplicationContext(TestableDAOContext())
        print 'Creating organization and contact item.'
        org = ctx.get_object('OrganizationDAO')
        org_dto = OrganizationDTO(name="Univerisityee of Nyeebraska-Lincoln")
        org.create_update(org_dto)
        org_model = DTOConverter.from_dto(Organization, org_dto)
        contact_item = ScrapedContact(first_name='Bee',
                                      last_name='Yee',
                                      organization={'name': "Univerisityee of Nyeebraska-Lincoln"}
        )

        print 'Converting contact to model.'
        converter = ctx.get_object('ModelConverter')
        model_contact = converter.to_model(Contact, contact_item)

        self.assertEqual(model_contact.organization.name, org_model.name)
Exemple #6
0
def _monitor_cache(dao, max_size, cache, job_queue, job_cond, fill_cond, empty_cond,
                   req_doms, blk_doms, srt_list, logger_lock):
    while True:
        try:
            with job_cond:
                next_job = job_queue.get(block=False)
        except Empty:
            with job_cond:
                job_cond.wait(1)
                try:
                    next_job = job_queue.get(block=False)
                except Empty:
                    continue

        if next_job == CacheJobs.Fill:
            with logger_lock:
                logger.info('Filling the cache')
            with fill_cond:
                urls = dao().findmany_by_domains(max_size - cache.qsize(),
                                                 req_doms, blk_doms, srt_list)
                for u in urls:
                    url_obj = DTOConverter.from_dto(URLMetadata, u)
                    try:
                        cache.put(url_obj)
                    except Full:
                        break
                fill_cond.notify_all()

        elif next_job == CacheJobs.Empty:
            with logger_lock:
                logger.info('Emptying the cache')
            with empty_cond:
                while True:
                    try:
                        cache.get(block=False)
                    except Empty:
                        break
                empty_cond.notify()
    def test_contact_dto_converter(self):
        my_contact = Contact(first_name="Jordan",
                             last_name="Degner",
                             phones=['4029813230'],
                             email="*****@*****.**",
                             position="Software Engineer",
        )

        print 'Converting a contact to a DTO.'
        contact_dto = DTOConverter.to_dto(ContactDTO, my_contact)

        print 'Testing equality...'
        for attr, value in my_contact.__dict__.iteritems():
            self.assertEqual(getattr(my_contact, attr), getattr(contact_dto, attr),
                             "{0} attribute not equal".format(attr))

        print 'Converting a DTO to a contact.'
        my_contact = DTOConverter.from_dto(Contact, contact_dto)

        print 'Testing equality...'
        for attr, value in my_contact.__dict__.iteritems():
            self.assertEqual(getattr(my_contact, attr), getattr(contact_dto, attr),
                             "{0} attribute not equal".format(attr))
    def test_page_rank(self):
        print 'Creating PageRankPreprocessor'
        prp = self.ctx.get_object('PageRankPreprocessor')

        print 'Bring organizations to memory'
        orgs = prp.bring_orgs_to_memory()

        print 'Cleaning organizations'
        orgs = prp.cleanup_data(orgs)

        print 'Creating dat matrix'
        matrix = prp.create_matrix(orgs)
        self.assertIsNotNone(matrix)

        print 'Creating the dampened google matrix'
        matrix = google_matrix(matrix)

        print 'Generating eigenvector'
        vector = left_eigenvector(matrix)

        print 'Creating PageRankPostprocessor'
        post = self.ctx.get_object('PageRankPostprocessor')

        print 'Assigning ranks to organizations'
        orgs = post.give_orgs_ranks(orgs, vector)

        print 'Storing organizations'
        post.store_organizations(orgs)

        dao = self.ctx.get_object('OrganizationDAO')
        new_org_models = [DTOConverter.from_dto(Organization, o) for o in dao.all()]

        self.assertEqual(len(new_org_models), len(self.assert_models),
                         "Error: returned model number different than expected")

        for model in self.assert_models:
            self._compare_assert_against_test(model, new_org_models)
    def test_org_dto_converter(self):
        my_org = Organization(name='Yoyodyne, Inc.',
                              address="1234 Yoyodyne Way, San Narciso, CA",
                              types=[OrgTypesEnum.GOVERNMENT, OrgTypesEnum.RESEARCH],
                              phone_numbers=["4026170423"],
                              emails=["*****@*****.**"],
                              contacts=[],
                              organization_url="www.yoyodyne.com",
                              page_rank_info=PageRankInfo(
                                  total_with_self=10,
                                  total=8,
                                  references=[
                                      PageRankVector(
                                          org_domain='yoyodyne.com',
                                          count=2,
                                          pages=[
                                              UrlCountPair(
                                                  url='http://www.yoyodyne.com/',
                                                  count=2
                                              )
                                          ]
                                      ),
                                      PageRankVector(
                                          org_domain='trystero.org',
                                          count=4,
                                          pages=[
                                              UrlCountPair(
                                                  url='http://www.yoyodyne.com/',
                                                  count=3
                                              ),
                                              UrlCountPair(
                                                  url='http://www.yoyodyne.com/contacts.php',
                                                  count=1
                                              )
                                          ]
                                      ),
                                      PageRankVector(
                                          org_domain='thurnandtaxis.info',
                                          count=4,
                                          pages=[
                                              UrlCountPair(
                                                  url='http://www.yoyodyne.com/',
                                                  count=4
                                              )
                                          ]
                                      )
                                  ]
                              )
        )

        print 'Converting an organization model to a DTO.'
        org_dto = DTOConverter.to_dto(OrganizationDTO, my_org)

        print 'Testing equality...'
        for attr, value in my_org.__dict__.iteritems():
            if attr == 'page_rank_info':
                self._compare_page_rank_info(my_org, org_dto)
            else:
                self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr),
                                 "{0} attribute not equal".format(attr))

        print 'Converting a DTO to an organization.'
        my_org = DTOConverter.from_dto(Organization, org_dto)

        print 'Testing equality...'
        for attr, value in my_org.__dict__.iteritems():
            if attr == 'page_rank_info':
                self._compare_page_rank_info(my_org, org_dto)
            else:
                self.assertEqual(getattr(my_org, attr), getattr(org_dto, attr),
                                 "{0} attribute not equal".format(attr))