def download_biographies(id):
    if id not in get_ids():
        raise ValueError("No source with id '%s' was found" % id)
    for src in _repo.get_sources():
        if src.id == id:
            break

    ls = biodes.parse_list(src.url)
    total = len(ls)
    skipped = 0
    for index, biourl in enumerate(ls):
        if not biourl.startswith("http:"):
            # we're dealing with a fs path
            biourl = os.path.normpath(biourl)
            if not os.path.isabs(biourl):
                biourl = os.path.join(os.path.dirname(src.url), biourl)

        bio = Biography(source_id=src.id, repository=src.repository)
        try:
            bio.from_url(biourl)
            print "%s/%s %s" % (index + 1, total, bio.get_names())
        except Exception, err:
            skipped += 1
            print err
            continue

        try:
            _repo.add_biography(bio)
        except:
            from pdb import set_trace

            set_trace()  ############################## Breakpoint ##############################
    def download_biographies(self, source, limit=None):
        """Download all biographies from source.url and add them to the repository.
        Mark any biographies that we did not find (anymore), by removing the source_url property.
        Return the number of total and skipped biographies.

        arguments:
            source: a Source instance

        returns:
             a list of biography instances
        """

        # at the URL given we find a list of links to biodes files
        # print 'Opening', source.url
        assert source.url, 'No URL was defined with the source "%s"' % source.id

        logging.info('downloading data at %s' % source.url)
        logging.info('parsing source url')

        # TODO: perhaps it would be better to check on Source.__init__ if repository argument is given
        if not source.repository:
            source.repository = self
        try:
            ls = biodes.parse_list(source.url)
            if limit:
                ls = ls[:limit]
        except etree.XMLSyntaxError, error:  # @UndefinedVariable
            raise BioPortException('Error parsing data at %s -- check if this is valid XML\n%s' % (source.url, error))
 def test_parse_list(self):
     url = os.path.join(this_dir, "list.xml")
     self.assertEqual(len(parse_list(url)), 2)