def create_filled_repository_from_scratch(self, sources=2):
        # create a repo filled with some data
        self.repo.db.metadata.create_all()
        url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw/list.xml'))
        source = Source(id=u'knaw', url=url, description='test')
        self.repo.add_source(source)
        self.repo.download_biographies(source)
        url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw2/list.xml'))
        if sources > 1:
            source = Source(id=u'knaw2', url=url, description='test')
            self.repo.add_source(source)
            self.repo.download_biographies(source)
        self.repo.db._update_category_table()

        # also add Bioport source
        src = Source('bioport', repository=self.repo)
        self.repo.add_source(src)
        src.set_quality(10000)

        def parse_dsn(s):
            return sqlalchemy.engine.url._parse_rfc1738_args(s)

        dsn = parse_dsn(DSN)
        username = dsn.username or ""
        passwd = dsn.password or ""
        if not passwd:
            sh('mysqldump -u %s bioport_test > %s' % (username, SQLDUMP_FILENAME))
        else:
            sh('mysqldump -u %s -p%s bioport_test > %s' % (username, passwd, SQLDUMP_FILENAME))
        self._is_filled = True
        return self.repo
    def get_bioport_biography(self, person, create_if_not_exists=True):
        """get, or if it does not yet exist, create, a biodes document that represents the interventions
        of the editors in the biographical portal

        arguments:
            person - an instance of Person
        returns:
            an instance of Biography
        """
        source = BioPortSource()

        if source.id not in [s.id for s in self.get_sources()]:
            src = Source('bioport', repository=self)
            self.add_source(src)
            src.set_quality(10000)

        ls = self.get_biographies(source=source, bioport_id=person.get_bioport_id())
        ls = list(ls)  # turn generator into list
        if not ls:
            if create_if_not_exists:
                # create a new biography
                return self._create_bioport_biography(person)
            else:
                return
        else:
            # disabled warning - this is not so bad after all
#            if len(ls) != 1:
#                logging.warning( 'There was more than one Bioport Biography found for the person with bioport_id %s' %
#                          person.get_bioport_id())
            # if we have more than one biography, we take the one that has the same bioport_id as the person
            # (if such exists) - otherwise, arbitrarily, the one with the highest id
            return ls[0]
            if len(ls) == 1:
                return ls[0]

            ls_with_our_bioid = [b for b in ls if person.get_bioport_id() in b.id]
            if ls_with_our_bioid:
                if not len(ls_with_our_bioid) == 1:
                    raise Exception()
                return ls_with_our_bioid[0]
            else:
#                ls = [(b.id, b) for b in ls]
#                ls.sort(reverse=True) #we sort reverse, because that is also how we sort in "get_biographies"
#                ls = [b for (x, b) in ls]
                return ls[0]
 def test_set_quality(self):
     i = self.repo
     sources = i.get_sources()
     src1 = Source(id=u'test1', url='x', repository=i)
     i.save(src1)
     self.assertEqual(src1.quality, 0)
     src2 = Source(id=u'test2', url='x', repository=i)
     i.save(src2)
     self.assertEqual(src2.quality, 0)
     src3 = Source(id=u'test3', url='x', repository=i)
     i.save(src3)
     self.assertEqual(src3.quality, 0)
     src4 = Source(id=u'test4', url='x', repository=i)
     i.save(src4)
     self.assertEqual(src4.quality, 0)
     # last in is, by default, of lowest quality
     self.assertEqual(i.get_sources(), sources + [src1, src2, src3, src4])
     # we say that src2 should have lowest quality
     src2.set_quality(0)
     src1.set_quality(1)
     src4.set_quality(2)
     self.assertEqual(src4.quality, 2, [(src.id, src.quality) for src in i.get_sources()])
     src3.set_quality(9)
     self.assertEqual(src3.quality, len(i.get_sources()) - 1)