def write_if_allowed(filename: str, content: str, overwrite: bool = False, mock: bool = False) -> None: """ Writes the contents to a file, if permitted. Args: filename: filename to write content: contents to write overwrite: permit overwrites? mock: pretend to write, but don't Raises: RuntimeError: if file exists but overwriting not permitted """ # Check we're allowed if not overwrite and exists(filename): fail(f"File exists, not overwriting: {filename!r}") # Make the directory, if necessary directory = dirname(filename) if not mock: mkdir_p(directory) # Write the file log.info("Writing to {!r}", filename) if mock: log.warning("Skipping writes as in mock mode") else: with open(filename, "wt") as outfile: outfile.write(content)
def test_anon(uniquepatients: bool, limit: int, from_src: bool, rawdir: str, anondir: str, scrubfile: str, resultsfile: str, dsttable: str, dstfield: str) -> None: """ Fetch raw and anonymised documents and store them in files for comparison, along with some summary information. Args: uniquepatients: fetch one document each for a lot of patients (rather than a lot of documents, potentially from the same patient or a small number)? limit: maximum number of documents to retrieve from_src: retrieve IDs from the source database, not the destination database? rawdir: directory to store raw documents in anondir: directory to store anonymised documents in scrubfile: filename to store scrubber information in (as JSON) resultsfile: filename to store CSV summaries in dsttable: name of the destination table dstfield: name of the destination table's text field of interest """ fieldinfo = FieldInfo(dsttable, dstfield) docids = get_docids(fieldinfo=fieldinfo, uniquepatients=uniquepatients, limit=limit, from_src=from_src) mkdir_p(rawdir) mkdir_p(anondir) scrubdict = {} # type: Dict[int, Dict[str, Any]] pidset = set() # type: Set[int] with open(resultsfile, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter='\t') first = True for docid in docids: # noinspection PyTypeChecker pid = process_doc(docid=docid, rawdir=rawdir, anondir=anondir, fieldinfo=fieldinfo, csvwriter=csvwriter, first=first, scrubdict=scrubdict) first = False pidset.add(pid) with open(scrubfile, 'w') as f: f.write(json.dumps(scrubdict, indent=4)) log.info(f"Finished. See {resultsfile} for a summary.") log.info(f"Use meld to compare directories {rawdir} and {anondir}") log.info("To install meld on Debian/Ubuntu: sudo apt-get install meld") log.info(f"{len(docids)} documents, {len(pidset)} patients")
def download_if_not_exists(url: str, filename: str, skip_cert_verify: bool = True, mkdir: bool = True) -> None: """ Downloads a URL to a file, unless the file already exists. """ if os.path.isfile(filename): log.info("No need to download, already have: {}", filename) return if mkdir: directory, basename = os.path.split(os.path.abspath(filename)) mkdir_p(directory) download(url=url, filename=filename, skip_cert_verify=skip_cert_verify)
def export_file(self, filename: str, text: str = None, binary: bytes = None, text_encoding: str = UTF8) -> False: """ Exports the file. Args: filename: text: text contents (specify this XOR ``binary``) binary: binary contents (specify this XOR ``text``) text_encoding: encoding to use when writing text Returns: bool: was it exported? """ assert bool(text) != bool(binary), "Specify text XOR binary" exported_task = self.exported_task filename = os.path.abspath(filename) directory = os.path.dirname(filename) recipient = exported_task.recipient if not recipient.file_overwrite_files and os.path.isfile(filename): self.abort("File already exists: {!r}".format(filename)) return False if recipient.file_make_directory: try: mkdir_p(directory) except Exception as e: self.abort("Couldn't make directory {!r}: {}".format( directory, e)) return False try: log.debug("Writing to {!r}", filename) if text: with open(filename, mode="w", encoding=text_encoding) as f: f.write(text) else: with open(filename, mode="wb") as f: f.write(binary) except Exception as e: self.abort("Failed to open or write file {!r}: {}".format( filename, e)) return False self.note_exported_file(filename) return True
def _start(self) -> None: """ Launch the external process. We will save and retrieve data via files, and send signals ("data ready", "results ready) via stdin/stout. """ if self._started or self._debug_mode: return args = self._progargs # Nasty MedEx hacks cwd = os.getcwd() log.info(f"for MedEx's benefit, changing to directory: " f"{self._workingdir.name}") os.chdir(self._workingdir.name) sentsdir = os.path.join(self._workingdir.name, "sents") log.info(f"making temporary sentences directory: {sentsdir}") mkdir_p(sentsdir) logdir = os.path.join(self._workingdir.name, "log") log.info(f"making temporary log directory: {logdir}") mkdir_p(logdir) log.info(f"launching command: {args}") self._p = subprocess.Popen( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, # stderr=subprocess.PIPE, shell=False, bufsize=1) # ... don't ask for stderr to be piped if you don't want it; firstly, # there's a risk that if you don't consume it, something hangs, and # secondly if you don't consume it, you see it on the console, which is # helpful. self._started = True log.info(f"returning to working directory {cwd}") os.chdir(cwd)
def __init__(self, nlpdef: NlpDefinition, cfgsection: str, commit: bool = False) -> None: """ Args: nlpdef: a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition` cfgsection: the name of a CRATE NLP config file section (from which we may choose to get extra config information) commit: force a COMMIT whenever we insert data? You should specify this in multiprocess mode, or you may get database deadlocks. """ super().__init__(nlpdef=nlpdef, cfgsection=cfgsection, commit=commit, name="MedEx") if nlpdef is None: # only None for debugging! self._debug_mode = True self._tablename = self.classname().lower() self._max_external_prog_uses = 1 self._progenvsection = "" self._env = {} # type: Dict[str, str] progargs = "" else: self._debug_mode = False self._tablename = nlpdef.opt_str(self._sectionname, ProcessorConfigKeys.DESTTABLE, required=True) self._max_external_prog_uses = nlpdef.opt_int( self._sectionname, ProcessorConfigKeys.MAX_EXTERNAL_PROG_USES, default=0) self._progenvsection = nlpdef.opt_str( self._sectionname, ProcessorConfigKeys.PROGENVSECTION) if self._progenvsection: self._env = nlpdef.get_env_dict( full_sectionname(NlpConfigPrefixes.ENV, self._progenvsection), os.environ) else: self._env = os.environ.copy() self._env["NLPLOGTAG"] = nlpdef.get_logtag() or '.' # ... because passing a "-lt" switch with no parameter will make # CrateGatePipeline.java complain and stop progargs = nlpdef.opt_str(self._sectionname, ProcessorConfigKeys.PROGARGS, required=True) if USE_TEMP_DIRS: self._inputdir = tempfile.TemporaryDirectory() self._outputdir = tempfile.TemporaryDirectory() self._workingdir = tempfile.TemporaryDirectory() # ... these are autodeleted when the object goes out of scope; see # https://docs.python.org/3/library/tempfile.html # ... which manages it using weakref.finalize else: homedir = os.path.expanduser("~") self._inputdir = PseudoTempDir( os.path.join(homedir, "medextemp", "input")) mkdir_p(self._inputdir.name) self._outputdir = PseudoTempDir( os.path.join(homedir, "medextemp", "output")) mkdir_p(self._outputdir.name) self._workingdir = PseudoTempDir( os.path.join(homedir, "medextemp", "working")) mkdir_p(self._workingdir.name) formatted_progargs = progargs.format(**self._env) self._progargs = shlex.split(formatted_progargs) self._progargs.extend([ "-data_ready_signal", MEDEX_DATA_READY_SIGNAL, "-results_ready_signal", MEDEX_RESULTS_READY_SIGNAL, "-i", self._inputdir.name, "-o", self._outputdir.name, ]) self._n_uses = 0 self._pipe_encoding = 'utf8' self._file_encoding = 'utf8' self._p = None # the subprocess self._started = False
def build_package() -> None: """ Builds the package. """ log.info("Building Python package") setup_py = join(SRCSERVERDIR, "setup.py") sdist_basefilename = "camcops_server-{}.tar.gz".format(MAINVERSION) src_sdist_file = join(SRCSERVERDIR, "dist", sdist_basefilename) wrk_sdist_file = join(WRKBASEDIR, sdist_basefilename) try: log.info("Deleting old {} if it exists", src_sdist_file) os.remove(src_sdist_file) except OSError: pass os.chdir(SETUP_PY_DIR) # or setup.py looks in wrong places? cmdargs = ["python", setup_py, "sdist"] call(cmdargs) remove_gzip_timestamp(src_sdist_file) log.info("Making directories") mkdir_p(DEBDIR) mkdir_p(DEBOVERRIDEDIR) mkdir_p(PACKAGEDIR) mkdir_p(RPMTOPDIR) mkdir_p(WRKCONFIGDIR) mkdir_p(WRKCONSOLEFILEDIR) mkdir_p(WRKDIR) mkdir_p(WRKDOCDIR) mkdir_p(WRKMANDIR) mkdir_p(WRKMPLCONFIGDIR) mkdir_p(WRKBASEDIR) mkdir_p(WRKTOOLDIR) for d in "BUILD,BUILDROOT,RPMS,RPMS/noarch,SOURCES,SPECS,SRPMS".split(","): mkdir_p(join(RPMTOPDIR, d)) log.info("Copying files") write_gzipped_text(join(WRKDOCDIR, "changelog.Debian"), get_changelog()) copyglob(join(SRCTOOLDIR, VENVSCRIPT), WRKTOOLDIR) shutil.copyfile(src_sdist_file, wrk_sdist_file) log.info("Creating man page for camcops. " "Will be installed as " + DSTMANFILE) write_gzipped_text(WRKMANFILE_BASE, get_man_page_camcops_server()) log.info("Creating man page for camcops_server_meta. " "Will be installed as " + DSTMETAMANFILE) write_gzipped_text(WRKMETAMANFILE_BASE, get_man_page_camcops_server_meta()) log.info("Creating links to documentation. " "Will be installed as " + DSTREADME) write_text(WRKREADME, get_readme()) log.info("Creating camcops_server launch script. " "Will be installed as " + DSTCONSOLEFILE) write_text(WRKCONSOLEFILE, get_camcops_server_launcher()) log.info("Creating camcops_server_meta launch script. " "Will be installed as " + DSTMETACONSOLEFILE) write_text(WRKMETACONSOLEFILE, get_camcops_server_meta_launcher()) log.info("Creating Debian control file") write_text(join(DEBDIR, "control"), get_debian_control()) log.info("Creating preinst file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".preinst")) write_text(join(DEBDIR, "preinst"), get_preinst()) log.info("Creating postinst file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".postinst")) write_text(join(DEBDIR, "postinst"), get_postinst(sdist_basefilename)) log.info("Creating prerm file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".prerm")) write_text(join(DEBDIR, "prerm"), get_prerm()) log.info("Creating postrm file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".postrm")) write_text(join(DEBDIR, "postrm"), get_postrm()) log.info("Creating Lintian override file") write_text(join(DEBOVERRIDEDIR, PACKAGE_DEB_NAME), get_override()) log.info("Creating copyright file. Will be installed as " + join(DSTDOCDIR, "copyright")) write_text(join(WRKDOCDIR, "copyright"), get_copyright()) log.info("Setting ownership and permissions") call(["find", WRKDIR, "-type", "d", "-exec", "chmod", "755", "{}", ";"]) # ... make directories executabe: must do that first, or all the subsequent # recursions fail call(["find", WRKDIR, "-type", "f", "-exec", "chmod", "644", "{}", ";"]) call([ "chmod", "a+x", WRKCONSOLEFILE, WRKMETACONSOLEFILE, join(DEBDIR, "prerm"), join(DEBDIR, "postrm"), join(DEBDIR, "preinst"), join(DEBDIR, "postinst"), ]) call( ["find", WRKDIR, "-iname", "*.py", "-exec", "chmod", "a+x", "{}", ";"]) call( ["find", WRKDIR, "-iname", "*.pl", "-exec", "chmod", "a+x", "{}", ";"]) log.info("Removing junk") call(["find", WRKDIR, "-name", "*.svn", "-exec", "rm", "-rf", "{}", ";"]) call(["find", WRKDIR, "-name", ".git", "-exec", "rm", "-rf", "{}", ";"]) call([ "find", WRKDOCDIR, "-name", "LICENSE", "-exec", "rm", "-rf", "{}", ";", ]) log.info("Building package") call(["fakeroot", "dpkg-deb", "--build", WRKDIR, PACKAGENAME]) # ... "fakeroot" prefix makes all files installed as root:root log.info("Checking with Lintian") # fail-in-warnings has gone in 2.62.0 # It isn't clear if lintian now exits with 0 on warnings (the previous # default). Future versions seems to have a more flexible --fail-on option call(["lintian", PACKAGENAME]) log.info("Converting to RPM") call( ["fakeroot", "alien", "--to-rpm", "--scripts", PACKAGENAME], cwd=PACKAGEDIR, ) # see "man alien" # NOTE: needs to be run as root for correct final permissions expected_main_rpm_name = "{PACKAGE}-{MAINVERSION}-2.noarch.rpm".format( PACKAGE=PACKAGE_DEB_NAME, MAINVERSION=MAINVERSION) full_rpm_path = join(PACKAGEDIR, expected_main_rpm_name) # This chown is causing problems with GitHub actions. The user is 'runner' # and there is no group called 'runner'. Is it needed anyway? Seems to run # OK locally without this line. # myuser = getpass.getuser() # shutil.chown(full_rpm_path, myuser, myuser) log.info("Changing dependencies within RPM") # Alien does not successfully translate the dependencies, and anyway the # names for packages are different on CentOS. A dummy prerequisite package # works (below) but is inelegant. # The rpmbuild commands are filters (text in via stdin, text out to # stdout), so replacement just needs the echo command. depends_rpm = get_lines_without_comments(RPM_REQ_FILE) echoparam = repr("Requires: {}".format(" ".join(depends_rpm))) call([ "rpmrebuild", "--define", "_topdir " + RPMTOPDIR, "--package", "--change-spec-requires=/bin/echo {}".format(echoparam), full_rpm_path, ]) # ... add "--edit-whole" as the last option before the RPM name to see what # you're getting # ... define topdir, or it builds in ~/rpmbuild/... # ... --package, or it looks for an installed RPM rather than a package # file # ... if echo parameter has brackets in, ensure it's quoted shutil.move( join(RPMTOPDIR, "RPMS", "noarch", expected_main_rpm_name), join(PACKAGEDIR, expected_main_rpm_name), ) # ... will overwrite its predecessor log.info("Deleting temporary workspace") shutil.rmtree(TMPDIR, ignore_errors=True) # CAUTION! # Done log.info("=" * 79) log.info("Debian package should be: " + PACKAGENAME) log.info("RPM should be: " + full_rpm_path)
if not _archive_static_dir: missing.append(SettingsKeys.ARCHIVE_STATIC_DIR) if not _archive_template_cache_dir: missing.append(SettingsKeys.ARCHIVE_TEMPLATE_CACHE_DIR) if not _archive_template_dir: missing.append(SettingsKeys.ARCHIVE_TEMPLATE_DIR) return HttpResponseBadRequest( f"Archive not configured. Administrator has not set: {missing!r}") # ============================================================================= # Set up caches and Mako lookups. # ============================================================================= if ARCHIVE_IS_CONFIGURED: mkdir_p(_archive_template_cache_dir) archive_mako_lookup = TemplateLookup( directories=[_archive_template_dir], module_directory=_archive_template_cache_dir, strict_undefined=True, # raise error immediately upon typos! ) else: archive_mako_lookup = None # ============================================================================= # Auditing # ============================================================================= def audit_archive_template(request: HttpRequest, patient_id: str, query_string: str) -> None:
DEST_SUPERVISOR_CONF_FILE = join(DEST_SUPERVISOR_CONF_DIR, f'{PACKAGE_FOR_DEB}.conf') DEB_PACKAGE_FILE = join(PACKAGE_DIR, f'{PACKAGE_FOR_DEB}_{DEBVERSION}_all.deb') LOCAL_CONFIG_BASENAME = "crateweb_local_settings.py" DEST_CRATEWEB_CONF_FILE = join(DEST_PACKAGE_CONF_DIR, LOCAL_CONFIG_BASENAME) INSTRUCTIONS = join(DEST_ROOT, 'instructions.txt') DEST_VENV_INSTALLER = join(DEST_ROOT, 'tools', 'install_virtualenv.py') DEST_WKHTMLTOPDF_INSTALLER = join(DEST_ROOT, 'tools', 'install_wkhtmltopdf.py') DEST_CRATE_PIPFILE = join(DEST_ROOT, CRATE_PIPFILE) # ============================================================================= # Make directories # ============================================================================= print("Making directories") mkdir_p(WORK_DIR) mkdir_p(workpath(DEST_ROOT)) mkdir_p(workpath(DEST_PACKAGE_CONF_DIR)) mkdir_p(workpath(DEST_SUPERVISOR_CONF_DIR)) mkdir_p(workpath(DEST_DOC_DIR)) mkdir_p(DEB_DIR) mkdir_p(DEB_OVERRIDE_DIR) # ============================================================================= # Make Debian files # ============================================================================= # ----------------------------------------------------------------------------- print("Creating preinst file. Will be installed as " + join(INFO_DEST_DPKG_DIR, PACKAGE_FOR_DEB + '.preinst')) # -----------------------------------------------------------------------------
def untar_to_directory(tarfile: str, directory: str, verbose: bool = False, gzipped: bool = False, skip_if_dir_exists: bool = True, run_func: RunFuncType = None, chdir_via_python: bool = True, tar_executable: str = None, tar_supports_force_local: bool = None) -> None: """ Unpacks a TAR file into a specified directory. Args: tarfile: filename of the ``.tar`` file directory: destination directory verbose: be verbose? gzipped: is the ``.tar`` also gzipped, e.g. a ``.tar.gz`` file? skip_if_dir_exists: don't do anything if the destrination directory exists? run_func: function to use to call an external command chdir_via_python: change directory via Python, not via ``tar``. Consider using this via Windows, because Cygwin ``tar`` v1.29 falls over when given a Windows path for its ``-C`` (or ``--directory``) option. tar_executable: name of the ``tar`` executable (default is ``tar``) tar_supports_force_local: does tar support the ``--force-local`` switch? If you pass ``None`` (the default), this is checked directly via ``tar --help``. Linux/GNU tar does; MacOS tar doesn't; Cygwin tar does; Windows 10 (build 17063+) tar doesn't. """ if skip_if_dir_exists and os.path.isdir(directory): log.info("Skipping extraction of {} as directory {} exists", tarfile, directory) return tar = which_and_require(tar_executable or "tar") if tar_supports_force_local is None: tar_supports_force_local = tar_supports_force_local_switch(tar) log.info("Extracting {} -> {}", tarfile, directory) mkdir_p(directory) args = [tar, "-x"] # -x: extract if verbose: args.append("-v") # -v: verbose if gzipped: args.append("-z") # -z: decompress using gzip if tar_supports_force_local: args.append("--force-local") # allows filenames with colons in args.extend(["-f", tarfile]) # -f: filename follows if chdir_via_python: with pushd(directory): run_func(args) else: # chdir via tar args.extend(["-C", directory]) # -C: change to directory run_func(args)
def prepare_umls_for_bioyodie(cfg: UmlsBioyodieConversionConfig) -> None: """ Prepare downloaded UMLS data for Bio-YODIE, according to the instructions at https://github.com/GateNLP/bio-yodie-resource-prep. """ # ------------------------------------------------------------------------- # Parameter checks # ------------------------------------------------------------------------- assert cfg.java_home assert cfg.gate_home # ------------------------------------------------------------------------- # Establish the release (version) # ------------------------------------------------------------------------- # There are two releases per year, e.g. 2017AA and 2017AB. release_regex = regex.compile(r"umls-(\d\d\d\dA[AB])-full.zip") umls_zip_basename = os.path.basename(cfg.umls_zip) try: release = release_regex.match(umls_zip_basename).group(1) except AttributeError: # 'NoneType' object has no attribute 'group' release = None # for type-checker only (below) die(f"Unable to work out UMLS release from filename: " f"{umls_zip_basename!r}") # ------------------------------------------------------------------------- # Directory names # ------------------------------------------------------------------------- umls_root_dir = join(cfg.tmp_dir, "umls_data_with_mmsys") umls_metadir = umls_root_dir umls_mmsys_home = umls_metadir # ... because the GUI installer wants "release.dat" (which is in the root # and config/2017AA directories of "mmsys.zip") to be in the same directory # as the Metathesaurus files. Do NOT put it in a "MMSYS" subdirectory, # despite # https://www.nlm.nih.gov/research/umls/implementation_resources/community/mmsys/BatchMRCXTBuilder.html umls_lib_dir = join(umls_mmsys_home, "lib") umls_plugins_dir = join(umls_mmsys_home, "plugins") umls_output_dir = join(cfg.tmp_dir, "umls_output") # ... Where we tell it to store data. # Log files and other output go here. bioyodie_repo_dir = join(cfg.tmp_dir, "bio-yodie-resource-prep") bioyodie_db_dir = join(bioyodie_repo_dir, "databases") bioyodie_scala_dir = join(bioyodie_repo_dir, "scala") bioyodie_tmpdata_dir = join(bioyodie_repo_dir, "tmpdata") bioyodie_umls_dir_containing_symlink = join( bioyodie_repo_dir, "srcs", "umls", "2015AB") # hard-coded "2015AB" bioyodie_umls_input_dir = join(bioyodie_umls_dir_containing_symlink, "META") # hard-coded "META" bioyodie_output_dir = join(bioyodie_repo_dir, "output") # ------------------------------------------------------------------------- # Filenames # ------------------------------------------------------------------------- scala_tgz = join(bioyodie_scala_dir, "scala.tgz") builder_script = join(bioyodie_repo_dir, "bin", "all.sh") mmsys_zip = join(umls_root_dir, "mmsys.zip") config_file = join(umls_metadir, "config.properties") boot_config = join(umls_mmsys_home, "etc", "subset.boot.properties") log4j_config = join(umls_mmsys_home, "etc", "rudolf.log4j.properties") # new # noqa system_java_home = cfg.java_home umls_java_home = join(umls_mmsys_home, "jre", "linux") # it brings its own # ------------------------------------------------------------------------- # Checks # ------------------------------------------------------------------------- if os.path.exists(cfg.dest_dir): die(f"Directory already exists: {cfg.dest_dir}") system_unzip = require_external_tool("unzip") # These are required by the Bio-YODIE preprocessor: groovy_executable = cfg.groovy_executable or require_external_tool( "groovy") # noqa require_external_tool("gzip") require_external_tool("zcat") # ------------------------------------------------------------------------- # Environment variables # ------------------------------------------------------------------------- # For UMLS umls_env = os.environ.copy() umls_env[EnvVar.JAVA_HOME] = umls_java_home # For Bio-YODIE preprocessor bioyodie_env = os.environ.copy() bioyodie_env[EnvVar.JAVA_HOME] = system_java_home bioyodie_env[EnvVar.GATE_HOME] = cfg.gate_home groovy_dir = os.path.dirname(os.path.abspath(groovy_executable)) old_path = bioyodie_env.get(EnvVar.PATH, "") new_path_with_groovy = os.pathsep.join(x for x in [groovy_dir, old_path] if x) bioyodie_env[EnvVar.PATH] = new_path_with_groovy # ------------------------------------------------------------------------- log.info("Cloning Bio-YODIE resource prep repository...") # ------------------------------------------------------------------------- check_call_verbose( ["git", "clone", cfg.bioyodie_prep_repo_url, bioyodie_repo_dir]) # ------------------------------------------------------------------------- log.info("Making directories...") # ------------------------------------------------------------------------- mkdir_p(umls_output_dir) mkdir_p(bioyodie_db_dir) # mkdir_p(bioyodie_scala_dir) # already exists mkdir_p(bioyodie_tmpdata_dir) mkdir_p(bioyodie_umls_dir_containing_symlink) mkdir_p(bioyodie_output_dir) # ------------------------------------------------------------------------- log.info("Fetching/building Scala for the BioYODIE processor...") # ------------------------------------------------------------------------- # ... either before we set JAVA_HOME (to use the system Java) or after # we've unpacked MMSYS (which brings its own JRE), but not in between! download(cfg.scala_url, scala_tgz) with pushd(bioyodie_scala_dir): check_call_verbose(["tar", "-xzvf", scala_tgz]) check_call_verbose(["ant"], env=bioyodie_env) # ------------------------------------------------------------------------- log.info("Unzipping UMLS data...") # ------------------------------------------------------------------------- check_call_verbose(["unzip", "-j", cfg.umls_zip, "-d", umls_root_dir]) # -j: junk paths (extract "flat" into the specified directory) # ------------------------------------------------------------------------- log.info("Unzipping UMLS MetamorphoSys (MMSYS) program (and its JRE)...") # ------------------------------------------------------------------------- check_call_verbose(["unzip", mmsys_zip, "-d", umls_mmsys_home]) # "To ensure proper functionality users must unzip mmsys.zip to the same # directory as the other downloaded files." # -- https://www.ncbi.nlm.nih.gov/books/NBK9683/ # ... but see also example above. # ------------------------------------------------------------------------- log.info("Running MetamorphoSys in batch mode...") # ------------------------------------------------------------------------- # https://www.nlm.nih.gov/research/umls/implementation_resources/community/mmsys/BatchMetaMorphoSys.html # noqa classpath = ":".join([ umls_mmsys_home, umls_plugins_dir, # RNC extra join(umls_lib_dir, "jpf-boot.jar"), join(umls_lib_dir, "jpf.jar"), # RNC extra # You can use "dir/*" to mean "all JAR files in a directory": # https://en.wikipedia.org/wiki/Classpath join(umls_plugins_dir, "gov.nih.nlm.umls.meta", "lib", "*"), # RNC extra # noqa join(umls_plugins_dir, "gov.nih.nlm.umls.mmsys", "lib", "*"), # RNC extra # noqa join(umls_plugins_dir, "gov.nih.nlm.umls.mmsys.gui", "lib", "*"), # RNC extra # noqa join(umls_plugins_dir, "gov.nih.nlm.umls.mmsys.io", "lib", "*"), # RNC extra # noqa join(umls_plugins_dir, "gov.nih.nlm.umls.util", "lib", "*"), # RNC extra # noqa ]) write_text( config_file, get_mmsys_configfile_text(metadir=umls_metadir, mmsys_home=umls_mmsys_home, release=release)) write_text(log4j_config, LOG4J_PROPERTIES_TEXT) with pushd(umls_mmsys_home): log.warning(f"The next step is slow, and doesn't say much. " f"It produces roughly 29 Gb at peak. " f"Watch progress with: " f"watch 'du -bc {cfg.tmp_dir} | tail -1'") check_call_verbose( [ join(cfg.java_home, "bin", "java"), "-classpath", classpath, "-Djava.awt.headless=true", f"-Djpf.boot.config={boot_config}", f"-Dlog4j.configurationFile={log4j_config}", # not "log4j.configuration" as in the original! Argh. # http://logging.apache.org/log4j/2.x/manual/configuration.html f"-Dinput.uri={umls_metadir}", f"-Doutput.uri={umls_output_dir}", f"-Dmmsys.config.uri={config_file}", # Additional from run_linux.sh: "-client", # JVM option: client rather than server mode "-Dunzip.native=true", f"-Dunzip.path={system_unzip}", "-Dfile.encoding=UTF-8", "-Xms1000M", # was 300M, but it's 1000M in run_linux.sh "-Xmx2000M", # was 1000M, but it's 2000M in run_linux.sh "org.java.plugin.boot.Boot" ], env=umls_env) # ------------------------------------------------------------------------- log.info("Converting UMLS data to Bio-YODIE format...") # ------------------------------------------------------------------------- os.symlink(src=umls_output_dir, dst=bioyodie_umls_input_dir, target_is_directory=True) with pushd(bioyodie_repo_dir): log.warning("The next step is also slow.") check_call_verbose([builder_script], env=bioyodie_env) # ------------------------------------------------------------------------- log.info(f"Moving Bio-YODIE data to destination directory: {cfg.dest_dir}") # ------------------------------------------------------------------------- output_files = os.listdir(bioyodie_output_dir) if output_files: shutil.copytree(bioyodie_output_dir, cfg.dest_dir) # ... destination should not already exist # ... it will make intermediate directories happily else: log.error(f"No output files in {bioyodie_output_dir}! " f"Did the Bio-YODIE preprocessor partly crash?")
def build_package() -> None: """ Builds the package. """ log.info("Building Python package") setup_py = join(SRCSERVERDIR, 'setup.py') sdist_basefilename = ('camcops_server-{}.tar.gz'.format(MAINVERSION)) src_sdist_file = join(SRCSERVERDIR, 'dist', sdist_basefilename) wrk_sdist_file = join(WRKBASEDIR, sdist_basefilename) try: log.info("Deleting old {} if it exists", src_sdist_file) os.remove(src_sdist_file) except OSError: pass os.chdir(SETUP_PY_DIR) # or setup.py looks in wrong places? cmdargs = ['python', setup_py, 'sdist', '--extras'] # special! call(cmdargs) remove_gzip_timestamp(src_sdist_file) log.info("Making directories") mkdir_p(DEBDIR) mkdir_p(DEBOVERRIDEDIR) mkdir_p(PACKAGEDIR) mkdir_p(RPMTOPDIR) mkdir_p(WRKCONFIGDIR) mkdir_p(WRKCONSOLEFILEDIR) mkdir_p(WRKDIR) mkdir_p(WRKDOCDIR) mkdir_p(WRKMANDIR) mkdir_p(WRKMPLCONFIGDIR) mkdir_p(WRKBASEDIR) mkdir_p(WRKTOOLDIR) for d in "BUILD,BUILDROOT,RPMS,RPMS/noarch,SOURCES,SPECS,SRPMS".split(","): mkdir_p(join(RPMTOPDIR, d)) log.info("Copying files") write_gzipped_text(join(WRKDOCDIR, 'changelog.Debian'), get_changelog()) copyglob(join(SRCTOOLDIR, VENVSCRIPT), WRKTOOLDIR) shutil.copyfile(src_sdist_file, wrk_sdist_file) log.info("Creating man page for camcops. " "Will be installed as " + DSTMANFILE) write_gzipped_text(WRKMANFILE_BASE, get_man_page_camcops_server()) log.info("Creating man page for camcops_server_meta. " "Will be installed as " + DSTMETAMANFILE) write_gzipped_text(WRKMETAMANFILE_BASE, get_man_page_camcops_server_meta()) log.info("Creating links to documentation. " "Will be installed as " + DSTREADME) write_text(WRKREADME, get_readme()) log.info("Creating camcops_server launch script. " "Will be installed as " + DSTCONSOLEFILE) write_text(WRKCONSOLEFILE, get_camcops_server_launcher()) log.info("Creating camcops_server_meta launch script. " "Will be installed as " + DSTMETACONSOLEFILE) write_text(WRKMETACONSOLEFILE, get_camcops_server_meta_launcher()) log.info("Creating Debian control file") write_text(join(DEBDIR, 'control'), get_debian_control()) log.info("Creating preinst file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.preinst')) write_text(join(DEBDIR, 'preinst'), get_preinst()) log.info("Creating postinst file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.postinst')) write_text(join(DEBDIR, 'postinst'), get_postinst(sdist_basefilename)) log.info("Creating prerm file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.prerm')) write_text(join(DEBDIR, 'prerm'), get_prerm()) log.info("Creating postrm file. Will be installed as " + join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.postrm')) write_text(join(DEBDIR, 'postrm'), get_postrm()) log.info("Creating Lintian override file") write_text(join(DEBOVERRIDEDIR, PACKAGE_DEB_NAME), get_override()) log.info("Creating copyright file. Will be installed as " + join(DSTDOCDIR, 'copyright')) write_text(join(WRKDOCDIR, 'copyright'), get_copyright()) log.info("Setting ownership and permissions") call(['find', WRKDIR, '-type', 'd', '-exec', 'chmod', '755', '{}', ';']) # ... make directories executabe: must do that first, or all the subsequent # recursions fail call(['find', WRKDIR, '-type', 'f', '-exec', 'chmod', '644', '{}', ';']) call([ "chmod", "a+x", WRKCONSOLEFILE, WRKMETACONSOLEFILE, join(DEBDIR, 'prerm'), join(DEBDIR, 'postrm'), join(DEBDIR, 'preinst'), join(DEBDIR, 'postinst'), ]) call( ['find', WRKDIR, '-iname', '*.py', '-exec', 'chmod', 'a+x', '{}', ';']) call( ['find', WRKDIR, '-iname', '*.pl', '-exec', 'chmod', 'a+x', '{}', ';']) log.info("Removing junk") call(['find', WRKDIR, '-name', '*.svn', '-exec', 'rm', '-rf', '{}', ';']) call(['find', WRKDIR, '-name', '.git', '-exec', 'rm', '-rf', '{}', ';']) call([ 'find', WRKDOCDIR, '-name', 'LICENSE', '-exec', 'rm', '-rf', '{}', ';' ]) log.info("Building package") call(['fakeroot', 'dpkg-deb', '--build', WRKDIR, PACKAGENAME]) # ... "fakeroot" prefix makes all files installed as root:root log.info("Checking with Lintian") call(['lintian', '--fail-on-warnings', PACKAGENAME]) log.info("Converting to RPM") call(['fakeroot', 'alien', '--to-rpm', '--scripts', PACKAGENAME], cwd=PACKAGEDIR) # see "man alien" # NOTE: needs to be run as root for correct final permissions expected_main_rpm_name = "{PACKAGE}-{MAINVERSION}-2.noarch.rpm".format( PACKAGE=PACKAGE_DEB_NAME, MAINVERSION=MAINVERSION, ) full_rpm_path = join(PACKAGEDIR, expected_main_rpm_name) myuser = getpass.getuser() shutil.chown(full_rpm_path, myuser, myuser) log.info("Changing dependencies within RPM") # Alien does not successfully translate the dependencies, and anyway the # names for packages are different on CentOS. A dummy prerequisite package # works (below) but is inelegant. # The rpmbuild commands are filters (text in via stdin, text out to # stdout), so replacement just needs the echo command. depends_rpm = get_lines_without_comments(RPM_REQ_FILE) echoparam = repr("Requires: {}".format(" ".join(depends_rpm))) call([ 'rpmrebuild', '--define', '_topdir ' + RPMTOPDIR, '--package', '--change-spec-requires=/bin/echo {}'.format(echoparam), full_rpm_path, ]) # ... add "--edit-whole" as the last option before the RPM name to see what # you're getting # ... define topdir, or it builds in ~/rpmbuild/... # ... --package, or it looks for an installed RPM rather than a package # file # ... if echo parameter has brackets in, ensure it's quoted shutil.move(join(RPMTOPDIR, 'RPMS', 'noarch', expected_main_rpm_name), join(PACKAGEDIR, expected_main_rpm_name)) # ... will overwrite its predecessor log.info("Deleting temporary workspace") shutil.rmtree(TMPDIR, ignore_errors=True) # CAUTION! # Done log.info("=" * 79) log.info("Debian package should be: " + PACKAGENAME) log.info("RPM should be: " + full_rpm_path)
def ensure_directories_exist() -> None: config = get_default_config_from_os_env() mkdir_p(config.export_lockdir) if config.user_download_dir: mkdir_p(config.user_download_dir)