Exemple #1
0
def _get_dm_api(user_id=None):
    cnf_loc = os.path.dirname(os.path.abspath(__file__)) + '/mongodb.cnf'

    if user_id == 'test':
        print("TEST USER DM API")
        return dmp(cnf_loc, test=True)

    if os.path.isfile(cnf_loc) is True:
        print("LIVE DM API")
        return dmp(cnf_loc)

    print("TEST DM API")
    return dmp(cnf_loc, test=True)
Exemple #2
0
    def __init__(self, user_id, file_id, cnf_loc=''):
        """
        Initialise the module and

        Parameters
        ----------
        user_id : str
            Identifier to uniquely locate the users files. Can be set to
            "common" if the files can be shared between users or 'test' for a
            dummy file
        file_id : str
            Location of the file in the file system
        """

        # Open the bigbed file
        if user_id == 'test':
            resource_path = os.path.join(os.path.dirname(__file__),
                                         "../tests/data/sample.bb")
            if os.path.isfile(resource_path) is False:
                gsa = GenerateSampleBigBed()
                gsa.main()
            self.file_handle = pyBigWig.open(resource_path, 'r')
        else:
            dm_handle = dmp(cnf_loc)
            file_obj = dm_handle.get_file_by_id(user_id, file_id)
            self.file_handle = pyBigWig.open(file_obj["file_path"], 'r')
    def __init__(self, user_id, file_id, cnf_loc=''):
        """
        Initialise the module and

        Parameters
        ----------
        user_id : str
            Identifier to uniquely locate the users files. Can be set to
            "common" if the files can be shared between users or 'test' for a
            dummy file
        file_id : str
            Location of the file in the file system
        resolution : int (Optional)
            Level of resolution. This is optional, but only the functions
            get_resolutions() and set_resolutions() can be called. Once the
            resolution has been set then all functions are callable.
        """

        # Open the bigwig file
        if user_id == 'test':
            self.file_handle = pysam.TabixFile(self.test_file_gz)
        else:
            dm_handle = dmp(cnf_loc)
            file_obj = dm_handle.get_file_by_id(user_id, file_id)
            self.file_handle = pyBigWig.open(file_obj['file_path'], 'r')
    def __init__(self, user_id, file_id, cnf_loc=''):
        """
        Initialise the module and

        Parameters
        ----------
        user_id : str
            Identifier to uniquely locate the users files. Can be set to
            "common" if the files can be shared between users or 'test' for a
            dummy file
        file_id : str
            Location of the file in the file system
        resolution : int (Optional)
            Level of resolution. This is optional, but only the functions
            get_resolutions() and set_resolutions() can be called. Once the
            resolution has been set then all functions are callable.
        """

        # Open the bigwig file
        # Open the bigbed file
        if user_id == 'test':
            resource_path = os.path.join(
                os.path.dirname(__file__),
                "../tests/data/sample.bw"
            )
            if os.path.isfile(resource_path) is False:
                gsa = GenerateSampleBigWig()
                gsa.main()
            self.file_handle = pyBigWig.open(resource_path, 'r')
        else:
            dm_handle = dmp(cnf_loc)
            file_obj = dm_handle.get_file_by_id(user_id, file_id)
            self.file_handle = pyBigWig.open(file_obj['file_path'], 'r')
Exemple #5
0
def test_loading():
    """
    Test the loading of new files into the MongoDB
    """

    users = ["adam", "ben", "chris", "denis", "eric"]
    file_types = [
        "fastq", "fa", "fasta", "bam", "bed", "hdf5", "tsv", "wig", "pdb"
    ]
    data_types = ['RNA-seq', 'MNase-Seq', 'ChIP-seq', 'WGBS', 'HiC']
    compressed = [None, 'gzip', 'zip']

    dm_handle = dmp(test=True)

    i = 0
    for file_type in file_types:
        for data_type in data_types:
            for zipped in compressed:
                user_id = random.choice(users)
                file_loc = '/tmp/test/' + data_type + '/test_' + str(
                    i) + '.' + file_type
                file_id = dm_handle.set_file(
                    user_id,
                    file_loc,
                    'file',
                    file_type,
                    64000,
                    None,
                    data_type,
                    9606,
                    zipped,
                    meta_data={'assembly': 'GCA_0123456789'})
                print(file_id)

                if data_type == 'RNA-seq' and file_type == 'fastq' and random.choice(
                    [0, 1]) == 1:
                    file_loc = '/tmp/test/' + data_type + '/test_' + str(
                        i) + '.bam'
                    file_id_2 = dm_handle.set_file(user_id,
                                                   file_loc,
                                                   'file',
                                                   'bam',
                                                   64000,
                                                   None,
                                                   data_type,
                                                   9606,
                                                   None, [file_id],
                                                   meta_data={
                                                       'assembly':
                                                       'GCA_0123456789',
                                                       'tool': 'kallisto'
                                                   })
                    print(file_id_2)
                i += 1

    for user_id in users:
        results = dm_handle.get_files_by_user(user_id)
        print(user_id, len(results))
def test_files_by_user_rest():
    """
    Test retrieving lists of files for all known test users.
    """
    users = ["adam", "ben", "chris", "denis", "eric", "test"]

    dm_handle = dmp(test=True)

    for user in users:
        results = dm_handle.get_files_by_user(user, True)
        for result in results:
            assert 'file_path' not in result
def test_files_by_user():
    """
    Test retrieving lists of files for all known test users.
    """
    users = ["adam", "ben", "chris", "denis", "eric", "test"]

    dm_handle = dmp(test=True)

    for user in users:
        results = dm_handle.get_files_by_user(user)
        print(user, len(results))
        assert isinstance(results, type([])) is True
Exemple #8
0
def test_history():
    """
    Test getting the history of a file.
    """
    users = ["adam", "ben", "chris", "denis", "eric"]
    dm_handle = dmp(test=True)

    for user in users:
        results = dm_handle.get_files_by_file_type(user, 'bam')

        for result in results:
            history = dm_handle.get_file_history(user, result['_id'])
            assert isinstance(history, list) is True
def test_files_by_type():
    """
    Test the retrieval of files for users by file type
    """
    users = ["adam", "ben", "chris", "denis", "eric"]
    file_types = ["fastq", "fasta", "bam", "bed", "hdf5", "tsv", "wig", "pdb"]

    dm_handle = dmp(test=True)

    for i in range(10):  # pylint: disable=unused-variable
        user = random.choice(users)
        file_type = random.choice(file_types)
        results = dm_handle.get_files_by_file_type(user, file_type)
        assert isinstance(results, type([])) is True or isinstance(results, type({})) is True
def test_files_by_user():
    """
    Test to determine that a user has N files and that removing a file reduces
    those stored within the API has reduced by 1.
    """
    user = "******"

    dm_handle = dmp(test=True)

    results = dm_handle.get_files_by_user(user)
    assert isinstance(results, type([])) is True

    original_number_of_files = len(results)

    dm_handle.remove_file(user, results[0]['_id'])
    results = dm_handle.get_files_by_user(user)

    assert original_number_of_files - 1 == len(results)
Exemple #11
0
    def __init__(self, user_id, file_id, resolution=None, cnf_loc=''):
        """
        Initialise the module and generate sample data if required

        Parameters
        ----------
        user_id : str
            Identifier to uniquely locate the users files. Can be set to
            "common" if the files can be shared between users
        file_id : str
            Location of the file in the file system
        resolution : int (Optional)
            Level of resolution. This is optional, but only the functions
            get_resolutions() and set_resolutions() can be called. Once the
            resolution has been set then all functions are callable.
        """
        self.file_id = file_id  # file_id required later on for URL generation

        if user_id == 'test':
            resource_path = os.path.join(
                os.path.dirname(__file__),
                "../tests/data/sample_adjacency.hdf5")
            if os.path.isfile(resource_path) is False:
                gsa = GenerateSampleAdjacency()
                gsa.main()

            self.hdf5_handle = h5py.File(resource_path, "r")
        else:
            dm_handle = dmp(cnf_loc)
            file_obj = dm_handle.get_file_by_id(user_id, file_id)
            self.hdf5_handle = h5py.File(file_obj["file_path"], "r")

        self.resolutions = [int(i) for i in self.hdf5_handle.keys()]

        if resolution is None:
            self.dset = self.hdf5_handle[str(self.resolutions[0])]
            self.resolution = self.resolutions[0]
        else:
            self.dset = self.hdf5_handle[str(resolution)]
            self.resolution = resolution

        self.chr_param = self._calculate_chr_param(
            self.resolutions, self.dset.attrs["chromosomes"])
def test_files_by_id():
    """
    Test the retrieval of files for users by file type
    """
    user = "******"
    file_paths = [
        ObjectId(str("0123456789ab0123456789aa")),
        ObjectId(str("0123456789ab0123456789ab")),
        ObjectId(str("0123456789ab0123456789ac")),
        ObjectId(str("0123456789ab0123456789ad"))
    ]

    dm_handle = dmp(test=True)

    for file_path in file_paths:  # pylint: disable=unused-variable
        result = dm_handle.get_file_by_id(user, file_path)
        print("DMP RESULTS:", result)
        assert isinstance(result, type({})) is True
        assert 'file_path' in result
Exemple #13
0
def test_files_by_user():
    """
    Test that it is possible to add and then remove a piece of meta data from
    a pre-existing file within the DM API.
    """
    user = "******"

    dm_handle = dmp(test=True)

    results = dm_handle.get_files_by_user(user)
    assert isinstance(results, type([])) is True

    file_id = dm_handle.add_file_metadata(user, results[0]['_id'], 'test',
                                          'An example string')
    result = dm_handle.get_file_by_id(user, file_id)
    assert 'test' in result['meta_data'].keys()

    dm_handle.remove_file_metadata(user, file_id, 'test')
    result = dm_handle.get_file_by_id(user, file_id)
    assert 'test' not in result['meta_data'].keys()
def test_files_by_file_path_rest():
    """
    Test the retrieval of files for users by file type
    """
    user = "******"
    file_name_bb = os.path.realpath(
        os.path.join(os.path.dirname(__file__), "../tests/data/sample.bb"))
    file_name_bw = os.path.realpath(
        os.path.join(os.path.dirname(__file__), "../tests/data/sample.bw"))
    file_paths = [file_name_bb, file_name_bw]

    dm_handle = dmp(test=True)

    for file_path in file_paths:
        results = dm_handle.get_file_by_file_path(user, file_path, True)

        assert isinstance(results, type([])) is True
        assert len(results) == 1

        for result in results:
            assert 'file_path' not in result
    def __init__(self, user_id, file_id, resolution=None, cnf_loc=''):
        """
        Initialise the module and set the required base parameters

        Parameters
        ----------
        user_id : str
            Identifier to uniquely locate the users files. Can be set to
            "common" if the files can be shared between users
        file_id : str
            Location of the file in the file system
        resolution : int (Optional)
            Level of resolution. This is optional, but only the functions
            get_resolutions() and set_resolutions() can be called. Once the
            resolution has been set then all functions are callable.
        """

        self.file_handle = None

        # Open the hdf5 file
        if user_id == 'test':
            resource_path = os.path.join(
                os.path.dirname(__file__),
                "../tests/data/sample_coords.hdf5"
            )
            if os.path.isfile(resource_path) is False:
                gsa = GenerateSampleCoords()
                gsa.main()
            self.file_handle = h5py.File(resource_path, 'r')
        else:
            dm_handle = dmp(cnf_loc)
            file_obj = dm_handle.get_file_by_id(user_id, file_id)
            self.file_handle = h5py.File(file_obj['file_path'], 'r')

        self.resolution = resolution

        if self.resolution is not None:
            self.grp = self.file_handle[str(self.resolution)]
            self.meta = self.grp['meta']
            self.mpgrp = self.meta['model_params']
            self.clusters = self.meta['clusters']
            self.centroids = self.meta['centroids']

            dset = self.grp['data']

            if 'dependencies' in dset.attrs:
                self.dependencies = json.loads(dset.attrs['dependencies'])
            else:
                self.dependencies = []

            if 'TADbit_meta' in dset.attrs:
                self.meta_data = json.loads(dset.attrs['TADbit_meta'])
            else:
                self.meta_data = {}

            if 'hic_data' in dset.attrs:
                self.hic_data = json.loads(dset.attrs['hic_data'])
            else:
                self.hic_data = {}

            if 'restraints' in dset.attrs:
                self.restraints = json.loads(dset.attrs['restraints'])
            else:
                self.restraints = {}
 pcs = process_chipseq()
 cf = common()
 
 #
 # MuG Tool Steps
 # --------------
 # 
 # 1. Create data files
 
 # Get the assembly
 #genome_fa = cf.getGenomeFromENA(data_dir, species, assembly, False)
 
 #2. Register the data with the DMP
 from dmp import dmp
 
 da = dmp()
 
 print da.get_files_by_user("test")
 
 genome_file = da.set_file("test", genome_fa, "fasta", "Assembly", 9606, None)
 file_in = da.set_file("test", file_loc, "fasta", "ChIP-seq", 9606, None)
 file_bg_in = da.set_file("test", file_bg_loc, "fasta", "ChIP-seq", 9606, None)
 
 print da.get_files_by_user("test")
 
 # 3. Instantiate and launch the App
 from basic_modules import WorkflowApp
 app = WorkflowApp()
 results = app.launch(process_chipseq, [genome_file, file_in, file_bg_in], {})
 
 print da.get_files_by_user("test")
        'resolutions': RESOLUTIONS,
        'enzyme_name': ENZYME_NAME,
        'windows1': WINDOWS1,
        'windows2': WINDOWS2,
        'normalized': NORMALIZED,
        'hdf5': True,
        'expt_name': EXPT_NAME,
        'window_type': WINDOW_TYPE
    }

    #
    # MuG Tool Steps
    # --------------
    #
    # 1. Create data files
    DM_HANDLER = dmp(test=True)

    #2. Register the data with the DMP
    genome_file = DM_HANDLER.set_file("test",
                                      GENOME_FA,
                                      "fasta",
                                      "Assembly",
                                      TAXON_ID,
                                      meta_data={'assembly': ASSEMBLY})
    genome_idx = DM_HANDLER.set_file("test",
                                     GENOME_GEM,
                                     "gem",
                                     "Assembly Index",
                                     TAXON_ID,
                                     meta_data={'assembly': ASSEMBLY})
    fastq_01_file_in = DM_HANDLER.set_file("test",