def download(self): urls = """\ http://www.inghist.nl/retroapp/service_archives/01_01/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_02/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_03/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_04/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_05/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_06/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_07/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_08/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_supplement/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/01_table/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/02_01/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/02_02/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/02_03/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/02_04/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/02_05/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/03_01/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/03_02/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/03_03/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/04_01/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/04_02/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/04_03/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/04_04/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/04_supplement/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/05_01/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/05_02/toc_xml_source?filename=*AK.xml http://www.inghist.nl/retroapp/service_archives/05_03/toc_xml_source?filename=*AK.xml""" for index, url in enumerate(urls.split('\n'), 1): url = url.strip() sh("wget %s -O in/%s.xml" % (url, index))
def create_filled_repository_from_scratch(self, sources=2): # create a repo filled with some data self.repo.db.metadata.create_all() url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw/list.xml')) source = Source(id=u'knaw', url=url, description='test') self.repo.add_source(source) self.repo.download_biographies(source) url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw2/list.xml')) if sources > 1: source = Source(id=u'knaw2', url=url, description='test') self.repo.add_source(source) self.repo.download_biographies(source) self.repo.db._update_category_table() # also add Bioport source src = Source('bioport', repository=self.repo) self.repo.add_source(src) src.set_quality(10000) def parse_dsn(s): return sqlalchemy.engine.url._parse_rfc1738_args(s) dsn = parse_dsn(DSN) username = dsn.username or "" passwd = dsn.password or "" if not passwd: sh('mysqldump -u %s bioport_test > %s' % (username, SQLDUMP_FILENAME)) else: sh('mysqldump -u %s -p%s bioport_test > %s' % (username, passwd, SQLDUMP_FILENAME)) self._is_filled = True return self.repo
def create_filled_repository(self, sources=None): """create a repository filled with example data""" if not self._fill_repository: return self.repo sql_string = open(SQLDUMP_FILENAME).read().decode('latin1') import bioport_repository.tests testdir = os.path.dirname(bioport_repository.tests.__file__) # datadir = os.path.join(testdir, 'data') sql_string = sql_string.replace('{{{test_data_dir}}}', testdir) def parse_dsn(s): return sqlalchemy.engine.url._parse_rfc1738_args(s) dsn = parse_dsn(DSN) username = dsn.username or "" passwd = dsn.password or "" self.repo.db.Session.remove() # we sometimes get table locks if we don't do this before calling metadata.drop_all() if not passwd: sh('mysql -u %s bioport_test -e "source %s"' % (username, SQLDUMP_FILENAME)) else: sh('mysql -u %s -p%s bioport_test -e "source %s"' % (username, passwd, SQLDUMP_FILENAME)) self._is_filled = True return self.repo
def parse_list(url): """get the list of biodes documents from the url return a list of urls to biodes documents """ #XXX USE biodes_list.BiodesList instead if url.endswith('tar.gz'): """we expect an archive containing biodes XML files""" from gerbrandyutils import sh def cleanup(tempdir): logging.info("Removing tempdir used for sources import %s" %tempdir) if os.path.isdir(tempdir): shutil.rmtree(tempdir) archive = os.path.basename(url) tempdir = tempfile.mkdtemp(prefix="bioport_") atexit.register(cleanup, tempdir) # XXX - specifiy user and password in the url -argument if url.startswith('http'): sh("wget %s --user=%s --password=%s" % (url, 'giampaolo', 'N@p0li')) elif url.startswith('file://'): _file = url.replace('file://', '') shutil.copy(_file, '.') else: raise ValueError("don't know what to do with url %s" % url) try: tar = tarfile.open(archive) tar.extractall(tempdir) tar.close() finally: # move the archive to temp dir so that it gets deleted later shutil.move(archive, tempdir) ls = [] for name in os.listdir(tempdir): fullname = os.path.join(tempdir, name) if fullname.endswith('.xml'): ls.append(fullname) return ls else: """we expect an XML file""" parser = etree.XMLParser(no_network=False) root = etree.parse(url, parser ) result = [] for n in root.xpath('//a'): result.append(n.get('href')) return result
def test_sh(self): stdout = sh("ls") self.assertTrue(stdout) self.assertRaises(RuntimeError, sh, 'badcmd')
def upload_results(): cmd = 'cd %s;svn ci . -m ""' % this_dir print cmd sh(cmd)