def upload(self, hub_only=False, host=None, user=None, rsync_options=None, hub_remote=None): self.process() if 'server' in self.group: host = host or self.group['server'].get('host') user = user or self.group['server'].get('user') rsync_options = rsync_options or self.group['server'].get( 'rsync_options') hub_remote = hub_remote or self.group['server'].get('hub_remote') self.hub.remote_fn = hub_remote self.hub.remote_dir = os.path.dirname(hub_remote) self.hub.render() if user == '$USER': user = os.environ.get('USER') kwargs = dict(host=host, user=user, rsync_options=rsync_options) upload_hub(hub=self.hub, **kwargs) if not hub_only: for track, level in self.hub.leaves(Track): upload_track(track=track, **kwargs) log("Hub can now be accessed via {0}".format(self.hub.url), style=Fore.BLUE)
def upload(self, hub_only=False, host=None, user=None, rsync_options=None, hub_remote=None): self.process() if 'server' in self.group: host = host or self.group['server'].get('host') user = user or self.group['server'].get('user') rsync_options = rsync_options or self.group['server'].get('rsync_options') hub_remote = hub_remote or self.group['server'].get('hub_remote') self.hub.remote_fn = hub_remote self.hub.remote_dir = os.path.dirname(hub_remote) self.hub.render() if user == '$USER': user = os.environ.get('USER') kwargs = dict(host=host, user=user, rsync_options=rsync_options) upload_hub(hub=self.hub, **kwargs) if not hub_only: for track, level in self.hub.leaves(Track): upload_track(track=track, **kwargs) log("Hub can now be accessed via {0}" .format(self.hub.url), style=Fore.BLUE)
def _build_metadata(self): """ If metadata-builder.py exists, always run it. """ builder = os.path.join(self.dirname, 'metadata-builder.py') if not os.path.exists(builder): return log("{0} exists. Running it...".format(builder)) metadata = os.path.join(self.dirname, 'metadata.yaml') if os.path.exists(metadata): backup = os.path.join(self.dirname, 'metadata.yaml.bak') shutil.copy(metadata, backup) log("Existing {0} backed up to {1}" .format(metadata, backup)) if not (stat.S_IXUSR & os.stat(builder)[stat.ST_MODE]): raise ValueError( Fore.RED + "{0} not executable".format(builder) + Fore.RESET) cmds = ['./metadata-builder.py'] retval = subprocess.check_call(cmds, cwd=self.dirname) if not os.path.exists(metadata): raise ValueError("Expected {0} but was not created by {1}" .format(metadata, builder))
def download_chainfile(source_assembly, target_assembly): """ Download if needed, putting in the cache_dir. If the environmental variable HUBWARD_CACHE_DIR does not exist, then use ~/.hubward_cache """ cache_dir = os.environ.get( 'HUBWARD_CACHE_DIR', os.path.expanduser('~/.hubward_cache')) utils.makedirs(cache_dir) url = chainfile_url(source_assembly, target_assembly) dest = os.path.join(cache_dir, os.path.basename(url)) if not os.path.exists(dest): log('Downloading {0} to {1}'.format(url, dest)) utils.download(url, dest) return dest
def _download(self): """ Downloads and unpacks the source to `raw-data`. After doing so, if self.original still does not exist, then raises a ValueError. """ log("Downloading '%s' -> '%s'" % (self.source_url, self.source_fn), indent=4) utils.download(self.source_url, self.source_fn) utils.unpack(self.source_fn, os.path.dirname(self.source_fn)) if self._needs_download(): raise ValueError( "Downloading and unpacking '%s' did not result in '%s'" % (self.source_url, self.source.fn))
def _download(self): """ Downloads and unpacks the source to `raw-data`. After doing so, if self.original still does not exist, then raises a ValueError. """ log( "Downloading '%s' -> '%s'" % (self.source_url, self.source_fn), indent=4) utils.download(self.source_url, self.source_fn) utils.unpack(self.source_fn, os.path.dirname(self.source_fn)) if self._needs_download(): raise ValueError( "Downloading and unpacking '%s' did not result in '%s'" % (self.source_url, self.source.fn))
def liftover(self, from_assembly, to_assembly, newfile): """ Lifts over the processed file to a new file, but only if needed. Uses a hidden sentinel file to indicate whether it's been lifted over. Parameters ---------- from_assembly : str Existing data are in this assembly's coordinates to_assembly : str Lift over existing data to this assembly's coordinates newfile : str Target filename of the lifted-over data """ if not from_assembly == self.genome: log( "{0} not from assembly {1}. Skipping liftover from {1} to {2} " "and simply copying the file as-is to {3}" .format(self.label, from_assembly, to_assembly, newfile) ) shutil.copy(self.processed, newfile) if not self._needs_liftover(from_assembly, to_assembly, newfile): log("{0} is already lifted over and up-to-date. Skipping." .format(newfile)) return tmp = tempfile.NamedTemporaryFile(delete=False).name log("Lift over {0} to {1}".format(self.processed, tmp)) liftover.liftover( from_assembly, to_assembly, self.processed, tmp, self.type_) utils.makedirs(os.path.dirname(newfile)) log("Moving {0} to {1}".format(tmp, newfile)) shutil.move(tmp, newfile) if self.type_.lower() == 'bam': shutil.move(tmp + '.bai', newfile + '.bai') # CrossMap.py seems to `chmod go-rw` on lifted-over file. So we copy # permissions from the original one. shutil.copymode(self.processed, newfile) # Write the sentinel file to indicate genome we lifted over to. sentinel = self._liftover_sentinel(from_assembly, to_assembly, newfile) with open(sentinel, 'w') as fout: pass
def _build_metadata(self): """ If metadata-builder.py exists, always run it. """ builder = os.path.join(self.dirname, 'metadata-builder.py') if not os.path.exists(builder): return log("{0} exists. Running it...".format(builder)) metadata = os.path.join(self.dirname, 'metadata.yaml') if os.path.exists(metadata): backup = os.path.join(self.dirname, 'metadata.yaml.bak') shutil.copy(metadata, backup) log("Existing {0} backed up to {1}".format(metadata, backup)) if not (stat.S_IXUSR & os.stat(builder)[stat.ST_MODE]): raise ValueError(Fore.RED + "{0} not executable".format(builder) + Fore.RESET) cmds = ['./metadata-builder.py'] retval = subprocess.check_call(cmds, cwd=self.dirname) if not os.path.exists(metadata): raise ValueError("Expected {0} but was not created by {1}".format( metadata, builder))
def liftover(self, from_assembly, to_assembly, newfile): """ Lifts over the processed file to a new file, but only if needed. Uses a hidden sentinel file to indicate whether it's been lifted over. Parameters ---------- from_assembly : str Existing data are in this assembly's coordinates to_assembly : str Lift over existing data to this assembly's coordinates newfile : str Target filename of the lifted-over data """ if not from_assembly == self.genome: log("{0} not from assembly {1}. Skipping liftover from {1} to {2} " "and simply copying the file as-is to {3}".format( self.label, from_assembly, to_assembly, newfile)) shutil.copy(self.processed, newfile) if not self._needs_liftover(from_assembly, to_assembly, newfile): log("{0} is already lifted over and up-to-date. Skipping.".format( newfile)) return tmp = tempfile.NamedTemporaryFile(delete=False).name log("Lift over {0} to {1}".format(self.processed, tmp)) liftover.liftover(from_assembly, to_assembly, self.processed, tmp, self.type_) utils.makedirs(os.path.dirname(newfile)) log("Moving {0} to {1}".format(tmp, newfile)) shutil.move(tmp, newfile) # CrossMap.py seems to `chmod go-rw` on lifted-over file. So we copy # permissions from the original one. shutil.copymode(self.processed, newfile) # Write the sentinel file to indicate genome we lifted over to. sentinel = self._liftover_sentinel(from_assembly, to_assembly, newfile) with open(sentinel, 'w') as fout: pass
def _needs_update(self): """ Decides if we need to update the processed file. """ do_update = False if self._was_lifted_over(): log( "This file appears to have been lifted over from another " "study, in which case we assume it does not need updating", style=Fore.YELLOW ) return False if self._needs_download(): log("{0.original} does not exist; downloading" .format(self, indent=4)) self._download() do_update = True if not os.path.exists(self.processed): log("{0.processed} does not exist".format(self), indent=4) do_update = True # if processed is a link, then check the LINK time if ( os.path.exists(self.processed) and utils.link_is_newer(self.script, self.processed) ): log("{0.script} is newer than {0.processed}, need to re-run" .format(self), indent=4) do_update = True # but for the original data, we want to FOLLOW the link if ( os.path.exists(self.original) and os.path.exists(self.processed) and utils.is_newer(self.original, self.processed) ): log("{0.original} is newer than {0.processed}, need to re-run" .format(self), indent=4) do_update = True if not do_update: log("{0.processed} is up to date" .format(self), indent=4, style=Style.DIM) return do_update
def process(self, force=False): log('Study: {0.study[label]}, in "{0.dirname}"'.format(self), style=Fore.BLUE) for d in self.tracks: d.process()
def _needs_update(self): """ Decides if we need to update the processed file. """ do_update = False if self._was_lifted_over(): log( "This file appears to have been lifted over from another " "study, in which case we assume it does not need updating", style=Fore.YELLOW) return False if self._needs_download(): log("{0.original} does not exist; downloading".format(self, indent=4)) self._download() do_update = True if not os.path.exists(self.processed): log("{0.processed} does not exist".format(self), indent=4) do_update = True # if processed is a link, then check the LINK time if (os.path.exists(self.processed) and utils.link_is_newer(self.script, self.processed)): log("{0.script} is newer than {0.processed}, need to re-run". format(self), indent=4) do_update = True # but for the original data, we want to FOLLOW the link if (os.path.exists(self.original) and os.path.exists(self.processed) and utils.is_newer(self.original, self.processed)): log("{0.original} is newer than {0.processed}, need to re-run". format(self), indent=4) do_update = True if not do_update: log("{0.processed} is up to date".format(self), indent=4, style=Style.DIM) return do_update
def process(self, force=False): log('Study: {0.study[description]}, in "{0.dirname}"'.format(self), style=Fore.BLUE) for d in self.tracks: d.process()