예제 #1
0
import pytest
import numpy as np
from kipoi.data import Dataset
from kipoi.specs import DataLoaderArgument, DataLoaderSchema, DataLoaderDescription
from kipoi.specs import Author, Dependencies
from kipoi_utils.utils import inherits_from
from collections import OrderedDict
import related
from kipoi.data import kipoi_dataloader

deps = Dependencies(pip='kipoiseq')
package_authors = [Author(name='John')]


@kipoi_dataloader(override={
    "dependencies": deps,
    'info.authors': package_authors
})
class Dl(Dataset):
    """
    info:
        doc: short doc
    args:
        arg1:
            doc: this is arg1
            example: hey
        n:
            doc: length of the dataset
    output_schema:
        inputs:
            name: seq
예제 #2
0
from kipoiseq.extractors import FastaStringExtractor
from kipoiseq.transforms import SwapAxes, DummyAxis, Compose, OneHot, ReorderedOneHot
from kipoiseq.transforms.functional import resize_interval
from kipoiseq.utils import to_scalar, parse_dtype

import pybedtools
from pybedtools import BedTool, Interval

# general dependencies
# bioconda::genomelake', TODO - add genomelake again once it gets released with pyfaidx to bioconda
deps = Dependencies(
    conda=['bioconda::pybedtools', 'bioconda::pyfaidx', 'numpy', 'pandas'],
    pip=['kipoiseq'])
package_authors = [
    Author(name='Ziga Avsec', github='avsecz'),
    Author(name='Roman Kreuzhuber', github='krrome')
]

# Object exported on import *
__all__ = ['SeqIntervalDl', 'StringSeqIntervalDl', 'BedDataset']


class BedDataset(object):
    """Reads a tsv file in the following format:
    ```
    chr  start  stop  task1  task2 ...
    ```

    # Arguments
      tsv_file: tsv file type
예제 #3
0
from kipoi.data import Dataset, kipoi_dataloader
from kipoi.metadata import GenomicRanges
from kipoi.specs import Author, Dependencies
from kipoi.data import SampleIterator

import gffutils
from pyfaidx import Fasta
import pickle

# general dependencies
# bioconda::genomelake', TODO - add genomelake again once it gets released with pyfaidx to bioconda
deps = Dependencies(conda=['bioconda::pyfaidx', 'numpy', 'pandas'],
                    pip=['kipoiseq', 'kipoi'])
package_authors = [Author(name='Jun Cheng', github='s6juncheng')]

__all__ = ['ExonInterval', 'generate_exons', 'MMSpliceDl']

# python 2.7 compatibility

try:
    FileNotFoundError
except NameError:
    FileNotFoundError = IOError

try:
    ModuleNotFoundError
except NameError:
    ModuleNotFoundError = ImportError
# ------------

예제 #4
0
from kipoi_conda.dependencies import Dependencies
from kipoiseq.transforms import ReorderedOneHot
from kipoi.specs import Author
from kipoi_utils.utils import default_kwargs
from kipoiseq.extractors import FastaStringExtractor
from kipoiseq.transforms.functional import resize_interval, one_hot_dna
from kipoiseq.utils import to_scalar, parse_dtype
from kipoiseq.dataclasses import Interval

deps = Dependencies(conda=[
    'bioconda::pybedtools', 'bioconda::pyfaidx', 'bioconda::pyranges', 'numpy',
    'pandas'
],
                    pip=['kipoiseq'])
package_authors = [
    Author(name='Ziga Avsec', github='avsecz'),
    Author(name='Roman Kreuzhuber', github='krrome')
]
# Add Alex here?

# Object exported on import *
__all__ = [
    'SeqIntervalDl', 'StringSeqIntervalDl', 'BedDataset', 'AnchoredGTFDl'
]


class BedDataset(object):
    """Reads a tsv file in the following format:
    ```
    chr  start  stop  task1  task2 ...
    ```
예제 #5
0
__all__ = [
    'SingleVariantProteinDataLoader',
    'SingleVariantUTRDataLoader',
]
deps = Dependencies(conda=[
    'bioconda::pybedtools',
    'bioconda::pyfaidx',
    'bioconda::pyranges',
    'bioconda::biopython',
    'numpy',
    'pandas',
],
                    pip=['kipoiseq'])
package_authors = [
    Author(name='Florian R. Hölzlwimmer', github='hoeze'),
    Author(name='Kalin Nonchev', github='KalinNonchev')
]


class SingleVariantProteinDataLoader(SampleIterator):
    """
    info:
        doc: >
            Dataloader for protein sequence models. With inputs as gtf annotation file and fasta file,
            each output is a protein sequence with flanking intronic seuqences. Intronic sequnce
            lengths specified by the users. Returned sequences are of the type np.array([str])
    type: SampleIterator
    args:
        gtf_file:
            doc: file path; Genome annotation GTF file