def _get_dm_api(user_id=None): cnf_loc = os.path.dirname(os.path.abspath(__file__)) + '/mongodb.cnf' if user_id == 'test': print("TEST USER DM API") return dmp(cnf_loc, test=True) if os.path.isfile(cnf_loc) is True: print("LIVE DM API") return dmp(cnf_loc) print("TEST DM API") return dmp(cnf_loc, test=True)
def __init__(self, user_id, file_id, cnf_loc=''): """ Initialise the module and Parameters ---------- user_id : str Identifier to uniquely locate the users files. Can be set to "common" if the files can be shared between users or 'test' for a dummy file file_id : str Location of the file in the file system """ # Open the bigbed file if user_id == 'test': resource_path = os.path.join(os.path.dirname(__file__), "../tests/data/sample.bb") if os.path.isfile(resource_path) is False: gsa = GenerateSampleBigBed() gsa.main() self.file_handle = pyBigWig.open(resource_path, 'r') else: dm_handle = dmp(cnf_loc) file_obj = dm_handle.get_file_by_id(user_id, file_id) self.file_handle = pyBigWig.open(file_obj["file_path"], 'r')
def __init__(self, user_id, file_id, cnf_loc=''): """ Initialise the module and Parameters ---------- user_id : str Identifier to uniquely locate the users files. Can be set to "common" if the files can be shared between users or 'test' for a dummy file file_id : str Location of the file in the file system resolution : int (Optional) Level of resolution. This is optional, but only the functions get_resolutions() and set_resolutions() can be called. Once the resolution has been set then all functions are callable. """ # Open the bigwig file if user_id == 'test': self.file_handle = pysam.TabixFile(self.test_file_gz) else: dm_handle = dmp(cnf_loc) file_obj = dm_handle.get_file_by_id(user_id, file_id) self.file_handle = pyBigWig.open(file_obj['file_path'], 'r')
def __init__(self, user_id, file_id, cnf_loc=''): """ Initialise the module and Parameters ---------- user_id : str Identifier to uniquely locate the users files. Can be set to "common" if the files can be shared between users or 'test' for a dummy file file_id : str Location of the file in the file system resolution : int (Optional) Level of resolution. This is optional, but only the functions get_resolutions() and set_resolutions() can be called. Once the resolution has been set then all functions are callable. """ # Open the bigwig file # Open the bigbed file if user_id == 'test': resource_path = os.path.join( os.path.dirname(__file__), "../tests/data/sample.bw" ) if os.path.isfile(resource_path) is False: gsa = GenerateSampleBigWig() gsa.main() self.file_handle = pyBigWig.open(resource_path, 'r') else: dm_handle = dmp(cnf_loc) file_obj = dm_handle.get_file_by_id(user_id, file_id) self.file_handle = pyBigWig.open(file_obj['file_path'], 'r')
def test_loading(): """ Test the loading of new files into the MongoDB """ users = ["adam", "ben", "chris", "denis", "eric"] file_types = [ "fastq", "fa", "fasta", "bam", "bed", "hdf5", "tsv", "wig", "pdb" ] data_types = ['RNA-seq', 'MNase-Seq', 'ChIP-seq', 'WGBS', 'HiC'] compressed = [None, 'gzip', 'zip'] dm_handle = dmp(test=True) i = 0 for file_type in file_types: for data_type in data_types: for zipped in compressed: user_id = random.choice(users) file_loc = '/tmp/test/' + data_type + '/test_' + str( i) + '.' + file_type file_id = dm_handle.set_file( user_id, file_loc, 'file', file_type, 64000, None, data_type, 9606, zipped, meta_data={'assembly': 'GCA_0123456789'}) print(file_id) if data_type == 'RNA-seq' and file_type == 'fastq' and random.choice( [0, 1]) == 1: file_loc = '/tmp/test/' + data_type + '/test_' + str( i) + '.bam' file_id_2 = dm_handle.set_file(user_id, file_loc, 'file', 'bam', 64000, None, data_type, 9606, None, [file_id], meta_data={ 'assembly': 'GCA_0123456789', 'tool': 'kallisto' }) print(file_id_2) i += 1 for user_id in users: results = dm_handle.get_files_by_user(user_id) print(user_id, len(results))
def test_files_by_user_rest(): """ Test retrieving lists of files for all known test users. """ users = ["adam", "ben", "chris", "denis", "eric", "test"] dm_handle = dmp(test=True) for user in users: results = dm_handle.get_files_by_user(user, True) for result in results: assert 'file_path' not in result
def test_files_by_user(): """ Test retrieving lists of files for all known test users. """ users = ["adam", "ben", "chris", "denis", "eric", "test"] dm_handle = dmp(test=True) for user in users: results = dm_handle.get_files_by_user(user) print(user, len(results)) assert isinstance(results, type([])) is True
def test_history(): """ Test getting the history of a file. """ users = ["adam", "ben", "chris", "denis", "eric"] dm_handle = dmp(test=True) for user in users: results = dm_handle.get_files_by_file_type(user, 'bam') for result in results: history = dm_handle.get_file_history(user, result['_id']) assert isinstance(history, list) is True
def test_files_by_type(): """ Test the retrieval of files for users by file type """ users = ["adam", "ben", "chris", "denis", "eric"] file_types = ["fastq", "fasta", "bam", "bed", "hdf5", "tsv", "wig", "pdb"] dm_handle = dmp(test=True) for i in range(10): # pylint: disable=unused-variable user = random.choice(users) file_type = random.choice(file_types) results = dm_handle.get_files_by_file_type(user, file_type) assert isinstance(results, type([])) is True or isinstance(results, type({})) is True
def test_files_by_user(): """ Test to determine that a user has N files and that removing a file reduces those stored within the API has reduced by 1. """ user = "******" dm_handle = dmp(test=True) results = dm_handle.get_files_by_user(user) assert isinstance(results, type([])) is True original_number_of_files = len(results) dm_handle.remove_file(user, results[0]['_id']) results = dm_handle.get_files_by_user(user) assert original_number_of_files - 1 == len(results)
def __init__(self, user_id, file_id, resolution=None, cnf_loc=''): """ Initialise the module and generate sample data if required Parameters ---------- user_id : str Identifier to uniquely locate the users files. Can be set to "common" if the files can be shared between users file_id : str Location of the file in the file system resolution : int (Optional) Level of resolution. This is optional, but only the functions get_resolutions() and set_resolutions() can be called. Once the resolution has been set then all functions are callable. """ self.file_id = file_id # file_id required later on for URL generation if user_id == 'test': resource_path = os.path.join( os.path.dirname(__file__), "../tests/data/sample_adjacency.hdf5") if os.path.isfile(resource_path) is False: gsa = GenerateSampleAdjacency() gsa.main() self.hdf5_handle = h5py.File(resource_path, "r") else: dm_handle = dmp(cnf_loc) file_obj = dm_handle.get_file_by_id(user_id, file_id) self.hdf5_handle = h5py.File(file_obj["file_path"], "r") self.resolutions = [int(i) for i in self.hdf5_handle.keys()] if resolution is None: self.dset = self.hdf5_handle[str(self.resolutions[0])] self.resolution = self.resolutions[0] else: self.dset = self.hdf5_handle[str(resolution)] self.resolution = resolution self.chr_param = self._calculate_chr_param( self.resolutions, self.dset.attrs["chromosomes"])
def test_files_by_id(): """ Test the retrieval of files for users by file type """ user = "******" file_paths = [ ObjectId(str("0123456789ab0123456789aa")), ObjectId(str("0123456789ab0123456789ab")), ObjectId(str("0123456789ab0123456789ac")), ObjectId(str("0123456789ab0123456789ad")) ] dm_handle = dmp(test=True) for file_path in file_paths: # pylint: disable=unused-variable result = dm_handle.get_file_by_id(user, file_path) print("DMP RESULTS:", result) assert isinstance(result, type({})) is True assert 'file_path' in result
def test_files_by_user(): """ Test that it is possible to add and then remove a piece of meta data from a pre-existing file within the DM API. """ user = "******" dm_handle = dmp(test=True) results = dm_handle.get_files_by_user(user) assert isinstance(results, type([])) is True file_id = dm_handle.add_file_metadata(user, results[0]['_id'], 'test', 'An example string') result = dm_handle.get_file_by_id(user, file_id) assert 'test' in result['meta_data'].keys() dm_handle.remove_file_metadata(user, file_id, 'test') result = dm_handle.get_file_by_id(user, file_id) assert 'test' not in result['meta_data'].keys()
def test_files_by_file_path_rest(): """ Test the retrieval of files for users by file type """ user = "******" file_name_bb = os.path.realpath( os.path.join(os.path.dirname(__file__), "../tests/data/sample.bb")) file_name_bw = os.path.realpath( os.path.join(os.path.dirname(__file__), "../tests/data/sample.bw")) file_paths = [file_name_bb, file_name_bw] dm_handle = dmp(test=True) for file_path in file_paths: results = dm_handle.get_file_by_file_path(user, file_path, True) assert isinstance(results, type([])) is True assert len(results) == 1 for result in results: assert 'file_path' not in result
def __init__(self, user_id, file_id, resolution=None, cnf_loc=''): """ Initialise the module and set the required base parameters Parameters ---------- user_id : str Identifier to uniquely locate the users files. Can be set to "common" if the files can be shared between users file_id : str Location of the file in the file system resolution : int (Optional) Level of resolution. This is optional, but only the functions get_resolutions() and set_resolutions() can be called. Once the resolution has been set then all functions are callable. """ self.file_handle = None # Open the hdf5 file if user_id == 'test': resource_path = os.path.join( os.path.dirname(__file__), "../tests/data/sample_coords.hdf5" ) if os.path.isfile(resource_path) is False: gsa = GenerateSampleCoords() gsa.main() self.file_handle = h5py.File(resource_path, 'r') else: dm_handle = dmp(cnf_loc) file_obj = dm_handle.get_file_by_id(user_id, file_id) self.file_handle = h5py.File(file_obj['file_path'], 'r') self.resolution = resolution if self.resolution is not None: self.grp = self.file_handle[str(self.resolution)] self.meta = self.grp['meta'] self.mpgrp = self.meta['model_params'] self.clusters = self.meta['clusters'] self.centroids = self.meta['centroids'] dset = self.grp['data'] if 'dependencies' in dset.attrs: self.dependencies = json.loads(dset.attrs['dependencies']) else: self.dependencies = [] if 'TADbit_meta' in dset.attrs: self.meta_data = json.loads(dset.attrs['TADbit_meta']) else: self.meta_data = {} if 'hic_data' in dset.attrs: self.hic_data = json.loads(dset.attrs['hic_data']) else: self.hic_data = {} if 'restraints' in dset.attrs: self.restraints = json.loads(dset.attrs['restraints']) else: self.restraints = {}
pcs = process_chipseq() cf = common() # # MuG Tool Steps # -------------- # # 1. Create data files # Get the assembly #genome_fa = cf.getGenomeFromENA(data_dir, species, assembly, False) #2. Register the data with the DMP from dmp import dmp da = dmp() print da.get_files_by_user("test") genome_file = da.set_file("test", genome_fa, "fasta", "Assembly", 9606, None) file_in = da.set_file("test", file_loc, "fasta", "ChIP-seq", 9606, None) file_bg_in = da.set_file("test", file_bg_loc, "fasta", "ChIP-seq", 9606, None) print da.get_files_by_user("test") # 3. Instantiate and launch the App from basic_modules import WorkflowApp app = WorkflowApp() results = app.launch(process_chipseq, [genome_file, file_in, file_bg_in], {}) print da.get_files_by_user("test")
'resolutions': RESOLUTIONS, 'enzyme_name': ENZYME_NAME, 'windows1': WINDOWS1, 'windows2': WINDOWS2, 'normalized': NORMALIZED, 'hdf5': True, 'expt_name': EXPT_NAME, 'window_type': WINDOW_TYPE } # # MuG Tool Steps # -------------- # # 1. Create data files DM_HANDLER = dmp(test=True) #2. Register the data with the DMP genome_file = DM_HANDLER.set_file("test", GENOME_FA, "fasta", "Assembly", TAXON_ID, meta_data={'assembly': ASSEMBLY}) genome_idx = DM_HANDLER.set_file("test", GENOME_GEM, "gem", "Assembly Index", TAXON_ID, meta_data={'assembly': ASSEMBLY}) fastq_01_file_in = DM_HANDLER.set_file("test",