Esempio n. 1
0
    def merge(self, dependencies):
        """Merge one dependencies with another one

        Use case: merging the dependencies of model and dataloader

        Args:
            dependencies: Dependencies instance

        Returns:
            new Dependencies instance
        """
        return Dependencies(
            conda=unique_list(list(self.conda) + list(dependencies.conda)),
            pip=kconda.normalize_pip(list(self.pip) + list(dependencies.pip)),
            conda_channels=unique_list(
                list(self.conda_channels) + list(dependencies.conda_channels)))
Esempio n. 2
0
    def _get_channels_packages(self):
        """Get conda channels and packages separated from each other(by '::')
        """
        if len(self.conda) == 0:
            return self.conda_channels, self.conda
        channels, packages = list(zip(*map(kconda.parse_conda_package, self.conda)))
        channels = unique_list(list(channels) + list(self.conda_channels))
        packages = unique_list(list(packages))

        # Handle channel order
        if "bioconda" in channels and "conda-forge" not in channels:
            # Insert 'conda-forge' right after bioconda if it is not included
            channels.insert(channels.index("bioconda") + 1, "conda-forge")
        if "pysam" in packages and "bioconda" in channels:
            if channels.index("defaults") < channels.index("bioconda"):
                logger.warning("Swapping channel order - putting defaults last. " +
                            "Using pysam bioconda instead of anaconda")
                channels.remove("defaults")
                channels.insert(len(channels), "defaults")
        return channels, packages
Esempio n. 3
0
def involved_directories(dataloader_kwargs, output_files=[], exclude_dirs=[]):
    """Infer the involved directories given dataloader kwargs
    """
    dirs = []
    # dataloader kwargs
    for k, v in dataloader_kwargs.items():
        if os.path.exists(v):
            dirs.append(os.path.dirname(os.path.abspath(v)))

    # output files
    for v in output_files:
        dirs.append(os.path.dirname(os.path.abspath(v)))

    # optionally exclude directories
    def in_any_dir(fname, dirs):
        return any([is_subdir(fname, os.path.expanduser(d))
                    for d in dirs])
    dirs = [x for x in dirs
            if not in_any_dir(x, exclude_dirs)]

    return unique_list(dirs)
Esempio n. 4
0
 def fn(x):
     # remove the templates
     return pd.Series(OrderedDict([
         ("N_models", x.shape[0]),
         ("N_subgroups", n_subgroups(x.child.fillna(""))),
         ("is_group", x.is_group.any()),
         ("authors", unique_list([author for authors in x.authors
                                  for author in authors])),
         ("contributors", unique_list([contributor for contributors in x.contributors
                                       for contributor in contributors])),
         ("veff_score_variants", x.veff_score_variants.any()),
         ("type", unique_list([t for t in x.type])),
         ("license", unique_list([l for l in x.license])),
         ("cite_as", unique_list([c for c in x.cite_as if c is not None])),
         ("tags", unique_list([tag for tags in x.tags
                               for tag in tags])),
     ]))
Esempio n. 5
0
def align_clustered_patterns(patterns, cluster_order, cluster,
                             align_track='contrib/mean',
                             metric='continousjaccard',
                             max_shift=30,
                             trials=10):
    """Align clustered patterns

    In addition to normal features under p.attrs['features'] it adds
    logo_contrib, logo_seq, profile scores and the directness score

    Args:
      patterns: list of patterns
      cluster_order: order rank for each pattern id. e.g. cluster_order[1] = 5
        means that patterns[1] needs to be at position 5 at the end
      cluster: cluster identity for each pattern
      align_track: which track to use for alignment
      max_shift: maximum allowed shift of the motif
      trials: how many times to run the alignment algorithm with different ordering

    Returns: list of motifs of the same length as patterns. They are ordered
      as they should be plotted in the table. Also 'cluster' field is added
    """
    # Order of the clusters
    cluster_order_ind = unique_list(cluster[np.argsort(cluster_order)])

    # 1. Get major patterns and align them
    major_patterns = optimal_align_patterns(get_major_patterns(patterns, cluster),
                                            cluster_order_ind,
                                            max_shift=max_shift,
                                            metric=metric,
                                            trials=trials,
                                            align_track=align_track)

    # align patterns to major patterns in the cluster and add the cluster/group information
    return [patterns[i].align(major_patterns[cluster[i]],
                              metric=metric,
                              track=align_track).add_attr('cluster', cluster[i])
            for i in tqdm(np.argsort(cluster_order))]