예제 #1
0
 def download(self):
     urls = """\
     http://www.inghist.nl/retroapp/service_archives/01_01/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_02/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_03/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_04/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_05/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_06/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_07/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_08/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_supplement/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/01_table/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/02_01/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/02_02/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/02_03/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/02_04/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/02_05/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/03_01/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/03_02/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/03_03/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/04_01/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/04_02/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/04_03/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/04_04/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/04_supplement/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/05_01/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/05_02/toc_xml_source?filename=*AK.xml
     http://www.inghist.nl/retroapp/service_archives/05_03/toc_xml_source?filename=*AK.xml"""
     for index, url in enumerate(urls.split('\n'), 1):
         url = url.strip()
         sh("wget %s -O in/%s.xml" % (url, index))
    def create_filled_repository_from_scratch(self, sources=2):
        # create a repo filled with some data
        self.repo.db.metadata.create_all()
        url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw/list.xml'))
        source = Source(id=u'knaw', url=url, description='test')
        self.repo.add_source(source)
        self.repo.download_biographies(source)
        url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw2/list.xml'))
        if sources > 1:
            source = Source(id=u'knaw2', url=url, description='test')
            self.repo.add_source(source)
            self.repo.download_biographies(source)
        self.repo.db._update_category_table()

        # also add Bioport source
        src = Source('bioport', repository=self.repo)
        self.repo.add_source(src)
        src.set_quality(10000)

        def parse_dsn(s):
            return sqlalchemy.engine.url._parse_rfc1738_args(s)

        dsn = parse_dsn(DSN)
        username = dsn.username or ""
        passwd = dsn.password or ""
        if not passwd:
            sh('mysqldump -u %s bioport_test > %s' % (username, SQLDUMP_FILENAME))
        else:
            sh('mysqldump -u %s -p%s bioport_test > %s' % (username, passwd, SQLDUMP_FILENAME))
        self._is_filled = True
        return self.repo
    def create_filled_repository(self, sources=None):
        """create  a repository filled with example data"""
        if not self._fill_repository:
            return self.repo
        sql_string = open(SQLDUMP_FILENAME).read().decode('latin1')
        import bioport_repository.tests
        testdir = os.path.dirname(bioport_repository.tests.__file__)
#        datadir = os.path.join(testdir, 'data')
        sql_string = sql_string.replace('{{{test_data_dir}}}', testdir)

        def parse_dsn(s):
            return sqlalchemy.engine.url._parse_rfc1738_args(s)

        dsn = parse_dsn(DSN)
        username = dsn.username or ""
        passwd = dsn.password or ""

        self.repo.db.Session.remove()  # we sometimes get table locks if we don't do this before calling metadata.drop_all()

        if not passwd:
            sh('mysql -u %s bioport_test -e "source %s"' % (username, SQLDUMP_FILENAME))
        else:
            sh('mysql -u %s -p%s bioport_test -e "source %s"' % (username, passwd, SQLDUMP_FILENAME))
        self._is_filled = True
        return self.repo
예제 #4
0
def parse_list(url):
    """get the list of biodes documents from the url

    return a list of urls to biodes documents
    """
    #XXX USE biodes_list.BiodesList instead
    if url.endswith('tar.gz'):
        """we expect an archive containing biodes XML files"""
        from gerbrandyutils import sh
        def cleanup(tempdir):
            logging.info("Removing tempdir used for sources import %s" %tempdir)
            if os.path.isdir(tempdir):
                shutil.rmtree(tempdir)

        archive = os.path.basename(url)
        tempdir = tempfile.mkdtemp(prefix="bioport_")
        atexit.register(cleanup, tempdir)

        # XXX - specifiy user and password in the url -argument
        if url.startswith('http'):
            sh("wget %s --user=%s --password=%s" % (url, 'giampaolo', 'N@p0li'))
        elif url.startswith('file://'):
            _file = url.replace('file://', '')
            shutil.copy(_file, '.')
        else:
            raise ValueError("don't know what to do with url %s" % url)
        try:
            tar = tarfile.open(archive)
            tar.extractall(tempdir)
            tar.close()
        finally:
            # move the archive to temp dir so that it gets deleted later
            shutil.move(archive, tempdir)

        ls = []
        for name in os.listdir(tempdir):
            fullname = os.path.join(tempdir, name)
            if fullname.endswith('.xml'):
                ls.append(fullname)
        return ls
    else:
        """we expect an XML file"""
        parser = etree.XMLParser(no_network=False)
        root = etree.parse(url, parser )
        result = []
        for n in root.xpath('//a'):
            result.append(n.get('href'))
        return result
예제 #5
0
 def test_sh(self):
     stdout = sh("ls")
     self.assertTrue(stdout)
     self.assertRaises(RuntimeError, sh, 'badcmd')
예제 #6
0
파일: db2xml.py 프로젝트: HoekR/namenindex
def upload_results():
    cmd = 'cd %s;svn ci . -m ""' % this_dir
    print cmd
    sh(cmd)