Beispiel #1
0
    def upload(self,
               hub_only=False,
               host=None,
               user=None,
               rsync_options=None,
               hub_remote=None):
        self.process()

        if 'server' in self.group:
            host = host or self.group['server'].get('host')
            user = user or self.group['server'].get('user')
            rsync_options = rsync_options or self.group['server'].get(
                'rsync_options')
            hub_remote = hub_remote or self.group['server'].get('hub_remote')

        self.hub.remote_fn = hub_remote
        self.hub.remote_dir = os.path.dirname(hub_remote)

        self.hub.render()

        if user == '$USER':
            user = os.environ.get('USER')
        kwargs = dict(host=host, user=user, rsync_options=rsync_options)

        upload_hub(hub=self.hub, **kwargs)
        if not hub_only:
            for track, level in self.hub.leaves(Track):
                upload_track(track=track, **kwargs)

        log("Hub can now be accessed via {0}".format(self.hub.url),
            style=Fore.BLUE)
Beispiel #2
0
    def upload(self, hub_only=False, host=None, user=None, rsync_options=None,
               hub_remote=None):
        self.process()

        if 'server' in self.group:
            host = host or self.group['server'].get('host')
            user = user or self.group['server'].get('user')
            rsync_options = rsync_options or self.group['server'].get('rsync_options')
            hub_remote = hub_remote or self.group['server'].get('hub_remote')

        self.hub.remote_fn = hub_remote
        self.hub.remote_dir = os.path.dirname(hub_remote)

        self.hub.render()

        if user == '$USER':
            user = os.environ.get('USER')
        kwargs = dict(host=host, user=user, rsync_options=rsync_options)

        upload_hub(hub=self.hub, **kwargs)
        if not hub_only:
            for track, level in self.hub.leaves(Track):
                upload_track(track=track, **kwargs)

        log("Hub can now be accessed via {0}"
            .format(self.hub.url), style=Fore.BLUE)
Beispiel #3
0
    def _build_metadata(self):
        """
        If metadata-builder.py exists, always run it.
        """
        builder = os.path.join(self.dirname, 'metadata-builder.py')
        if not os.path.exists(builder):
            return

        log("{0} exists. Running it...".format(builder))
        metadata = os.path.join(self.dirname, 'metadata.yaml')
        if os.path.exists(metadata):
            backup = os.path.join(self.dirname, 'metadata.yaml.bak')
            shutil.copy(metadata, backup)
            log("Existing {0} backed up to {1}"
                .format(metadata, backup))

        if not (stat.S_IXUSR & os.stat(builder)[stat.ST_MODE]):
            raise ValueError(
                Fore.RED +
                "{0} not executable".format(builder) +
                Fore.RESET)
        cmds = ['./metadata-builder.py']
        retval = subprocess.check_call(cmds, cwd=self.dirname)

        if not os.path.exists(metadata):
            raise ValueError("Expected {0} but was not created by {1}"
                             .format(metadata, builder))
Beispiel #4
0
def download_chainfile(source_assembly, target_assembly):
    """
    Download if needed, putting in the cache_dir.

    If the environmental variable HUBWARD_CACHE_DIR does not exist, then use
    ~/.hubward_cache
    """
    cache_dir = os.environ.get(
        'HUBWARD_CACHE_DIR', os.path.expanduser('~/.hubward_cache'))
    utils.makedirs(cache_dir)
    url = chainfile_url(source_assembly, target_assembly)
    dest = os.path.join(cache_dir, os.path.basename(url))
    if not os.path.exists(dest):
        log('Downloading {0} to {1}'.format(url, dest))
        utils.download(url, dest)
    return dest
Beispiel #5
0
    def _download(self):
        """
        Downloads and unpacks the source to `raw-data`.

        After doing so, if self.original still does not exist, then raises
        a ValueError.
        """
        log("Downloading '%s' -> '%s'" % (self.source_url, self.source_fn),
            indent=4)
        utils.download(self.source_url, self.source_fn)
        utils.unpack(self.source_fn, os.path.dirname(self.source_fn))

        if self._needs_download():
            raise ValueError(
                "Downloading and unpacking '%s' did not result in '%s'" %
                (self.source_url, self.source.fn))
Beispiel #6
0
    def _download(self):
        """
        Downloads and unpacks the source to `raw-data`.

        After doing so, if self.original still does not exist, then raises
        a ValueError.
        """
        log(
            "Downloading '%s' -> '%s'" %
            (self.source_url, self.source_fn), indent=4)
        utils.download(self.source_url, self.source_fn)
        utils.unpack(self.source_fn, os.path.dirname(self.source_fn))

        if self._needs_download():
            raise ValueError(
                "Downloading and unpacking '%s' did not result in '%s'"
                % (self.source_url, self.source.fn))
Beispiel #7
0
    def liftover(self, from_assembly, to_assembly, newfile):
        """
        Lifts over the processed file to a new file, but only if needed.

        Uses a hidden sentinel file to indicate whether it's been lifted over.

        Parameters
        ----------

        from_assembly : str
            Existing data are in this assembly's coordinates

        to_assembly : str
            Lift over existing data to this assembly's coordinates

        newfile : str
            Target filename of the lifted-over data
        """

        if not from_assembly == self.genome:
            log(
                "{0} not from assembly {1}. Skipping liftover from {1} to {2} "
                "and simply copying the file as-is to {3}"
                .format(self.label, from_assembly, to_assembly, newfile)
            )
            shutil.copy(self.processed, newfile)

        if not self._needs_liftover(from_assembly, to_assembly, newfile):
            log("{0} is already lifted over and up-to-date. Skipping."
                .format(newfile))
            return

        tmp = tempfile.NamedTemporaryFile(delete=False).name
        log("Lift over {0} to {1}".format(self.processed, tmp))
        liftover.liftover(
            from_assembly, to_assembly, self.processed, tmp, self.type_)
        utils.makedirs(os.path.dirname(newfile))
        log("Moving {0} to {1}".format(tmp, newfile))
        shutil.move(tmp, newfile)

        if self.type_.lower() == 'bam':
            shutil.move(tmp + '.bai', newfile + '.bai')


        # CrossMap.py seems to `chmod go-rw` on lifted-over file. So we copy
        # permissions from the original one.
        shutil.copymode(self.processed, newfile)

        # Write the sentinel file to indicate genome we lifted over to.
        sentinel = self._liftover_sentinel(from_assembly, to_assembly, newfile)
        with open(sentinel, 'w') as fout:
            pass
Beispiel #8
0
    def _build_metadata(self):
        """
        If metadata-builder.py exists, always run it.
        """
        builder = os.path.join(self.dirname, 'metadata-builder.py')
        if not os.path.exists(builder):
            return

        log("{0} exists. Running it...".format(builder))
        metadata = os.path.join(self.dirname, 'metadata.yaml')
        if os.path.exists(metadata):
            backup = os.path.join(self.dirname, 'metadata.yaml.bak')
            shutil.copy(metadata, backup)
            log("Existing {0} backed up to {1}".format(metadata, backup))

        if not (stat.S_IXUSR & os.stat(builder)[stat.ST_MODE]):
            raise ValueError(Fore.RED + "{0} not executable".format(builder) +
                             Fore.RESET)
        cmds = ['./metadata-builder.py']
        retval = subprocess.check_call(cmds, cwd=self.dirname)

        if not os.path.exists(metadata):
            raise ValueError("Expected {0} but was not created by {1}".format(
                metadata, builder))
Beispiel #9
0
    def liftover(self, from_assembly, to_assembly, newfile):
        """
        Lifts over the processed file to a new file, but only if needed.

        Uses a hidden sentinel file to indicate whether it's been lifted over.

        Parameters
        ----------

        from_assembly : str
            Existing data are in this assembly's coordinates

        to_assembly : str
            Lift over existing data to this assembly's coordinates

        newfile : str
            Target filename of the lifted-over data
        """

        if not from_assembly == self.genome:
            log("{0} not from assembly {1}. Skipping liftover from {1} to {2} "
                "and simply copying the file as-is to {3}".format(
                    self.label, from_assembly, to_assembly, newfile))
            shutil.copy(self.processed, newfile)

        if not self._needs_liftover(from_assembly, to_assembly, newfile):
            log("{0} is already lifted over and up-to-date. Skipping.".format(
                newfile))
            return

        tmp = tempfile.NamedTemporaryFile(delete=False).name
        log("Lift over {0} to {1}".format(self.processed, tmp))
        liftover.liftover(from_assembly, to_assembly, self.processed, tmp,
                          self.type_)
        utils.makedirs(os.path.dirname(newfile))
        log("Moving {0} to {1}".format(tmp, newfile))
        shutil.move(tmp, newfile)

        # CrossMap.py seems to `chmod go-rw` on lifted-over file. So we copy
        # permissions from the original one.
        shutil.copymode(self.processed, newfile)

        # Write the sentinel file to indicate genome we lifted over to.
        sentinel = self._liftover_sentinel(from_assembly, to_assembly, newfile)
        with open(sentinel, 'w') as fout:
            pass
Beispiel #10
0
    def _needs_update(self):
        """
        Decides if we need to update the processed file.
        """
        do_update = False
        if self._was_lifted_over():
            log(
                "This file appears to have been lifted over from another "
                "study, in which case we assume it does not need updating",
                style=Fore.YELLOW
            )
            return False
        if self._needs_download():
            log("{0.original} does not exist; downloading"
                .format(self, indent=4))
            self._download()
            do_update = True

        if not os.path.exists(self.processed):
            log("{0.processed} does not exist".format(self), indent=4)
            do_update = True

        # if processed is a link, then check the LINK time
        if (
            os.path.exists(self.processed) and
            utils.link_is_newer(self.script, self.processed)
        ):
            log("{0.script} is newer than {0.processed}, need to re-run"
                .format(self), indent=4)
            do_update = True

        # but for the original data, we want to FOLLOW the link
        if (
                os.path.exists(self.original) and
                os.path.exists(self.processed) and
                utils.is_newer(self.original, self.processed)
        ):
            log("{0.original} is newer than {0.processed}, need to re-run"
                .format(self), indent=4)
            do_update = True

        if not do_update:
            log("{0.processed} is up to date"
                .format(self), indent=4, style=Style.DIM)

        return do_update
Beispiel #11
0
 def process(self, force=False):
     log('Study: {0.study[label]}, in "{0.dirname}"'.format(self),
         style=Fore.BLUE)
     for d in self.tracks:
         d.process()
Beispiel #12
0
    def _needs_update(self):
        """
        Decides if we need to update the processed file.
        """
        do_update = False
        if self._was_lifted_over():
            log(
                "This file appears to have been lifted over from another "
                "study, in which case we assume it does not need updating",
                style=Fore.YELLOW)
            return False
        if self._needs_download():
            log("{0.original} does not exist; downloading".format(self,
                                                                  indent=4))
            self._download()
            do_update = True

        if not os.path.exists(self.processed):
            log("{0.processed} does not exist".format(self), indent=4)
            do_update = True

        # if processed is a link, then check the LINK time
        if (os.path.exists(self.processed)
                and utils.link_is_newer(self.script, self.processed)):
            log("{0.script} is newer than {0.processed}, need to re-run".
                format(self),
                indent=4)
            do_update = True

        # but for the original data, we want to FOLLOW the link
        if (os.path.exists(self.original) and os.path.exists(self.processed)
                and utils.is_newer(self.original, self.processed)):
            log("{0.original} is newer than {0.processed}, need to re-run".
                format(self),
                indent=4)
            do_update = True

        if not do_update:
            log("{0.processed} is up to date".format(self),
                indent=4,
                style=Style.DIM)

        return do_update
Beispiel #13
0
 def process(self, force=False):
     log('Study: {0.study[description]}, in "{0.dirname}"'.format(self),
         style=Fore.BLUE)
     for d in self.tracks:
         d.process()