def test_function_return_primitive(): print "test_function_return_primitive" val = 1 i = function_return_primitive(val) function_return_primitive(i) i = compss_wait_on(i) if i == val * 2: print "- Primitive access from MP: OK" else: print "- Primitive access from MP: ERROR"
def test_function_return_object(): print "test_function_return_object" val = 1 o = function_return_object(val) o.instance_method() o = compss_wait_on(o) if o.field == val * 2: print "- Object access from MP: OK" else: print "- Object access from MP: ERROR"
def test_instance_method(): print "test_instance_method" val = 1 o = MyClass(val) o.instance_method() o.instance_method_nonmodifier() o.instance_method() o = compss_wait_on(o) if (o.field == val * 4): print "- Object access from MP: OK" else: print "- Object access from MP: ERROR"
def test_mp_object_access(): print "test_object_mp_access" val = 1 o = MyClass(val) l = [1, 2, 3, 4] dic = {'key1':'value1', 'key2':'value2'} tup = ('a', 'b', 'c') cplx = complex('1+2j') function_objects(o, l, dic, tup, cplx, par_func) o = compss_wait_on(o) if (o.field == val * 2): print "- Object access from MP: OK" else: print "- Object access from MP: ERROR" o.field = val * 4 function_objects(o, l, dic, tup, cplx, par_func)
def test_merge_reduce(self): from pycompss.api.api import compss_wait_on res = merge_reduce(self.methodFunction, self.data) res = compss_wait_on(res) self.assertEqual(res, sum(self.data))
def testFailedBinaryExitValue(self): ev = failedBinary(123) ev = compss_wait_on(ev) self.assertEqual(ev, 123) # own exit code for failed execution
def run(self, input_files, input_metadata, output_files): # pylint: disable=too-many-locals,too-many-statements """ The main function to run MACS 2 for peak calling over a given BAM file and matching background BAM file. Parameters ---------- input_files : list List of input bam file locations where 0 is the bam data file and 1 is the matching background bam file metadata : dict Returns ------- output_files : list List of locations for the output files. output_metadata : list List of matching metadata dict objects """ root_name = input_files['bam'].split("/") root_name[-1] = root_name[-1].replace('.bam', '') name = root_name[-1] # input and output share most metadata output_bed_types = { 'narrow_peak': "bed4+1", 'summits': "bed6+4", 'broad_peak': "bed6+3", 'gapped_peak': "bed12+3" } command_params = self.get_macs2_params(self.configuration) bam_utils_handle = bamUtilsTask() bam_utils_handle.bam_index(input_files['bam'], input_files['bam'] + '.bai') if 'bam_bg' in input_files: bam_utils_handle.bam_index(input_files['bam_bg'], input_files['bam_bg'] + '.bai') chr_list = bam_utils_handle.bam_list_chromosomes(input_files['bam']) chr_list = compss_wait_on(chr_list) logger.info("MACS2 COMMAND PARAMS: " + ", ".join(command_params)) for chromosome in chr_list: if 'bam_bg' in input_files: result = self.macs2_peak_calling( name + "." + str(chromosome), str(input_files['bam']), str(input_files['bam']) + '.bai', str(input_files['bam_bg']), str(input_files['bam_bg']) + '.bai', command_params, str(output_files['narrow_peak']) + "." + str(chromosome), str(output_files['summits']) + "." + str(chromosome), str(output_files['broad_peak']) + "." + str(chromosome), str(output_files['gapped_peak']) + "." + str(chromosome), chromosome) else: result = self.macs2_peak_calling_nobgd( name + "." + str(chromosome), str(input_files['bam']), str(input_files['bam']) + '.bai', command_params, str(output_files['narrow_peak']) + "." + str(chromosome), str(output_files['summits']) + "." + str(chromosome), str(output_files['broad_peak']) + "." + str(chromosome), str(output_files['gapped_peak']) + "." + str(chromosome), chromosome) if result is False: logger.fatal( "MACS2: Something went wrong with the peak calling") # Merge the results files into single files. with open(output_files['narrow_peak'], 'wb') as file_np_handle: with open(output_files['summits'], 'wb') as file_s_handle: with open(output_files['broad_peak'], 'wb') as file_bp_handle: with open(output_files['gapped_peak'], 'wb') as file_gp_handle: for chromosome in chr_list: np_file_chr = "{}.{}".format( output_files['narrow_peak'], chromosome) s_file_chr = "{}.{}".format( output_files['summits'], chromosome) bp_file_chr = "{}.{}".format( output_files['broad_peak'], chromosome) gp_file_chr = "{}.{}".format( output_files['gapped_peak'], chromosome) if hasattr(sys, '_run_from_cmdl') is True: with open(np_file_chr, 'rb') as file_in_handle: file_np_handle.write(file_in_handle.read()) with open(s_file_chr, 'rb') as file_in_handle: file_s_handle.write(file_in_handle.read()) with open(bp_file_chr, 'rb') as file_in_handle: file_bp_handle.write(file_in_handle.read()) with open(gp_file_chr, 'rb') as file_in_handle: file_gp_handle.write(file_in_handle.read()) else: with compss_open(np_file_chr, 'rb') as file_in_handle: file_np_handle.write(file_in_handle.read()) with compss_open(s_file_chr, 'rb') as file_in_handle: file_s_handle.write(file_in_handle.read()) with compss_open(bp_file_chr, 'rb') as file_in_handle: file_bp_handle.write(file_in_handle.read()) with compss_open(gp_file_chr, 'rb') as file_in_handle: file_gp_handle.write(file_in_handle.read()) compss_delete_file(np_file_chr) compss_delete_file(s_file_chr) compss_delete_file(bp_file_chr) compss_delete_file(gp_file_chr) output_files_created = {} output_metadata = {} for result_file in output_files: if (os.path.isfile(output_files[result_file]) is True and os.path.getsize(output_files[result_file]) > 0): output_files_created[result_file] = output_files[result_file] sources = [input_metadata["bam"].file_path] if 'bam_bg' in input_files: sources.append(input_metadata["bam_bg"].file_path) output_metadata[result_file] = Metadata( data_type="data_chip_seq", file_type="BED", file_path=output_files[result_file], sources=sources, taxon_id=input_metadata["bam"].taxon_id, meta_data={ "assembly": input_metadata["bam"].meta_data["assembly"], "tool": "macs2", "bed_type": output_bed_types[result_file], "parameters": command_params }) else: os.remove(output_files[result_file]) logger.info('MACS2: GENERATED FILES: ', ' '.join(output_files)) return (output_files_created, output_metadata)
def fit(self, x, test=None): """ Fits a model using training data. Training data is also used to check for convergence unless test data is provided. Parameters ---------- x : ds-array, shape=(n_ratings, n_users) ds-array where each row is the collection of ratings given by a user test : csr_matrix Sparse matrix used to check convergence with users as rows and items as columns. If not passed, uses training data to check convergence. """ self.converged = False self.users = None self.items = None n_u = x.shape[0] n_i = x.shape[1] if self.verbose: print("Item blocks: %s" % n_i) print("User blocks: %s" % n_u) if self.random_state: np.random.seed(self.random_state) self.converged = False users = None items = np.random.rand(n_i, self.n_f) # Assign average rating as first feature # average_ratings = dataset.mean(axis='columns').collect() average_ratings = _mean(x) items[:, 0] = average_ratings rmse, last_rmse = np.inf, np.NaN i = 0 while not self._has_finished(i): last_rmse = rmse users = self._update(r=x, x=items, axis=0) items = self._update(r=x, x=users, axis=1) if self.check_convergence: _test = x if test is None else test rmse = compss_wait_on(self._compute_rmse(_test, users, items)) self.converged = self._has_converged(last_rmse, rmse) if self.verbose: test_set = "Train" if test is None else "Test" print("%s RMSE: %.3f [%s]" % (test_set, rmse, abs(last_rmse - rmse))) i += 1 self.users = compss_wait_on(users) self.items = compss_wait_on(items) return users, items
from mpi4py import MPI rank = MPI.COMM_WORLD.rank if len(data) == 2: return (data[0] + data[1]) * rank else: return -len(data) if __name__ == "__main__": print("Test single element form previous task") data = [] for i in range(4): data.append(genData()) data2 = [10, 10, 10, 10] ret = layout_test_with_normal(2, data, data2) ret = compss_wait_on(ret) if ret[0] == 2 and ret[1] == 12 and ret[2] == 22 and ret[3] == 32: print("Test correct.") else: raise Exception("Incorrect values " + str(ret) + ". Expecting [2,12,22,32]") print("Test same with 2 layouts") ret = two_layouts_test(2, data, data2) ret = compss_wait_on(ret) if ret[0] == 2 and ret[1] == 12 and ret[2] == 22 and ret[3] == 32: print("Test correct.") else: raise Exception("Incorrect values " + str(ret) + ". Expecting [2,12,22,32]")
def testArgTask3(self): pending1, pending2 = self.argTask() result1 = compss_wait_on(pending1) result2 = compss_wait_on(pending2) self.assertEqual((result1, result2), (0, 12345))
def testJit3(self): result = addJit(4, 5) result = compss_wait_on(result) self.assertEqual(result, 9)
def testNumbaBaseParameter(self): result = multiply(2, 3) result = compss_wait_on(result) self.assertEqual(result, 6)
def testJit2(self): result = increment2(1) result = compss_wait_on(result) self.assertEqual(result, 2)
def testNumbaBase(self): result = add(2, 3) result = compss_wait_on(result) self.assertEqual(result, 5)
def run(self, input_files, input_metadata, output_files): # pylint: disable=too-many-locals,too-many-statements """ The main function to align bam files to a genome using Bowtie2 Parameters ---------- input_files : dict File 0 is the genome file location, file 1 is the FASTQ file metadata : dict output_files : dict Returns ------- output_files : dict First element is a list of output_bam_files, second element is the matching meta data output_metadata : dict """ sources = [input_files["genome"]] fqs = fastq_splitter() fastq1 = input_files["loc"] sources.append(input_files["loc"]) fastq_file_gz = str(fastq1 + ".tar.gz") if "fastq2" in input_files: fastq2 = input_files["fastq2"] sources.append(input_files["fastq2"]) fastq_file_list = fqs.paired_splitter(fastq1, fastq2, fastq_file_gz) else: fastq_file_list = fqs.single_splitter(fastq1, fastq_file_gz) # Required to prevent iterating over the future objects fastq_file_list = compss_wait_on(fastq_file_list) if not fastq_file_list: logger.fatal("FASTQ SPLITTER: run failed") return {}, {} if hasattr(sys, '_run_from_cmdl') is True: pass else: logger.info("Getting the tar file") with compss_open(fastq_file_gz, "rb") as f_in: with open(fastq_file_gz, "wb") as f_out: f_out.write(f_in.read()) gz_data_path = fastq_file_gz.split("/") gz_data_path = "/".join(gz_data_path[:-1]) try: tar = tarfile.open(fastq_file_gz) tar.extractall(path=gz_data_path) tar.close() except tarfile.TarError: logger.fatal("Split FASTQ files: Malformed tar file") return {}, {} # input and output share most metadata output_metadata = {} output_bam_file = output_files["output"] logger.info("BOWTIE2 ALIGNER: Aligning sequence reads to the genome") output_bam_list = [] for fastq_file_pair in fastq_file_list: if "fastq2" in input_files: tmp_fq1 = gz_data_path + "/tmp/" + fastq_file_pair[0] tmp_fq2 = gz_data_path + "/tmp/" + fastq_file_pair[1] output_bam_file_tmp = tmp_fq1 + ".bam" output_bam_list.append(output_bam_file_tmp) self.bowtie2_aligner_paired( str(input_files["genome"]), tmp_fq1, tmp_fq2, output_bam_file_tmp, str(input_files["index"]), self.get_aln_params(self.configuration, True)) else: tmp_fq = gz_data_path + "/tmp/" + fastq_file_pair[0] output_bam_file_tmp = tmp_fq + ".bam" output_bam_list.append(output_bam_file_tmp) logger.info("BOWTIE2 ALN FILES:" + tmp_fq) self.bowtie2_aligner_single( str(input_files["genome"]), tmp_fq, output_bam_file_tmp, str(input_files["index"]), self.get_aln_params(self.configuration)) bam_handle = bamUtilsTask() logger.info("Merging bam files") bam_handle.bam_merge(output_bam_list) logger.info("Sorting merged bam file") bam_handle.bam_sort(output_bam_list[0]) logger.info("Copying bam file into the output file") bam_handle.bam_copy(output_bam_list[0], output_bam_file) logger.info("BOWTIE2 ALIGNER: Alignments complete") output_metadata = { "bam": Metadata(data_type=input_metadata['loc'].data_type, file_type="BAM", file_path=output_files["output"], sources=[ input_metadata["genome"].file_path, input_metadata['loc'].file_path ], taxon_id=input_metadata["genome"].taxon_id, meta_data={ "assembly": input_metadata["genome"].meta_data["assembly"], "tool": "bowtie_aligner" }) } return ({"bam": output_files["output"]}, output_metadata)
def testPythonVersion(self): self.assertEqual(sys.version_info[:][0], self.testing_version) worker_version = workerInterpreter() worker_version = compss_wait_on(worker_version) self.assertEqual(worker_version, self.testing_version)
def testKwargsDictUnrollingDefaults(self): z = {'a': 10, 'b': 20, 'c': 30} pending1, pending2 = self.taskUnrollDictWithDefaults(**z) result1 = compss_wait_on(pending1) result2 = compss_wait_on(pending2) self.assertEqual((result1, result2), (30, {'c': 30}))
def testKwargsDictUnrollingDefaultsControl(self): pending1, pending2 = self.taskUnrollDictWithDefaults() result1 = compss_wait_on(pending1) result2 = compss_wait_on(pending2) self.assertEqual((result1, result2), (3, {}))
def testFailedBinaryExitValue(self): ev = exit_with_code(19) ev = compss_wait_on(ev) self.assertEqual(ev, 19) # own exit code for failed execution
def main(num_blocks, elems_per_block, check_result, seed, use_storage): # Generate the dataset in a distributed manner # i.e: avoid having the master a whole matrix A, B, C = [], [], [] for i in range(num_blocks): for l in [A, B, C]: l.append([]) # Keep track of blockId to initialize with different random seeds bid = 0 for j in range(num_blocks): for l in [A, B]: l[-1].append( generate_block(elems_per_block, num_blocks, seed=seed + bid, psco=True, use_storage=use_storage)) bid += 1 C[-1].append( generate_block(elems_per_block, num_blocks, psco=False, set_to_zero=True, use_storage=use_storage)) dot(A, B, C, True) # Persist the result in a distributed manner (i.e: exploit data locality & # avoid memory flooding) for i in range(num_blocks): for j in range(num_blocks): if use_storage: persist_result(C[i][j]) # If we are not going to check the result, we can safely delete the Cij intermediate # matrices if not check_result: from pycompss.api.api import compss_delete_object as del_obj del_obj(C[i][j]) # Check if we get the same result if multiplying sequentially (no tasks) # Note that this implies having the whole A and B matrices in the master, # so it is advisable to set --check_result only with small matrices # Explicit correctness (i.e: an actual dot product is performed) must be checked # manually if check_result: from pycompss.api.api import compss_wait_on for i in range(num_blocks): for j in range(num_blocks): A[i][j] = compss_wait_on(A[i][j]) B[i][j] = compss_wait_on(B[i][j]) for i in range(num_blocks): for j in range(num_blocks): Cij = compss_wait_on(C[i][j]) Dij = generate_block(elems_per_block, num_blocks, psco=False, set_to_zero=True) Dij = compss_wait_on(Dij) for k in range(num_blocks): Dij += np.dot(A[i][k].block, B[k][j].block) if not np.allclose(Cij, Dij): print('Block %d-%d gives different products!' % (i, j)) return print('Distributed and sequential results coincide!')
def testNJit2(self): result = decrement2(1) result = compss_wait_on(result) self.assertEqual(result, 0)
def testArgTask2(self): pending1, pending2 = self.argTask(1, 2, 3, 4) result1 = compss_wait_on(pending1) result2 = compss_wait_on(pending2) self.assertEqual((result1, result2), (10, 1))
def testNJit2(self): result = subtractNjit(5, 4) result = compss_wait_on(result) self.assertEqual(result, 1)
def testGeneratedJit2(self): result = is_missing2(5) result = compss_wait_on(result) self.assertEqual(result, False)
def run(self, input_files, input_metadata, output_files): """ The main function to run chicago for peak calling. The input files are .chinput and are transformed from BAM files using bam2chicago.sh input files could be just one file or a comma separated files from more than one biological replicate. Technical replicates should be pooled to one .chinput Parameters ---------- input_files : dict list of .chinput files, or str with a single .chinput file input_metadata : dict output_files: dict with the output path Returns ------- output_files : Dict List of locations for the output files, output_metadata : Dict List of matching metadata dict objects """ #check if the output directory exists, otherwise create it output_dir = os.path.split(output_files["output"])[0] if not os.path.exists(output_dir): logger.info("creating output directory: " + output_dir) os.makedirs(output_dir) command_params = self.get_chicago_params(self.configuration) logger.info("Chicago command parameters " + " ".join(command_params)) if isinstance(input_files["chinput"], list): chinput_folder = os.path.split( input_files["chinput"][0])[0] + "/chinput" if os.path.isdir(chinput_folder) is False: os.mkdir(chinput_folder) for chinput_fl in input_files["chinput"]: copyfile(chinput_fl, chinput_folder + "/" + os.path.split(chinput_fl)[1]) common.tar_folder(chinput_folder, chinput_folder + ".tar", os.path.split(chinput_folder)[1]) final_chinput = chinput_folder + ".tar" else: final_chinput = input_files["chinput"] results = self.chicago( final_chinput, self.configuration["chicago_out_prefix"], output_files["output"], command_params, input_files["setting_file"], input_files["rmap_chicago"], input_files["baitmap_chicago"], input_files["nbpb_chicago"], input_files["npb_chicago"], input_files["poe_chicago"], ) results = compss_wait_on(results) output_metadata = { "output": Metadata(data_type="chicago_CHIC", file_type="TAR", file_path=output_files["output"], sources=[ input_metadata["chinput"].file_path, ], taxon_id=input_metadata["chinput"].taxon_id, meta_data={"tool": "run_chicago"}) } return output_files, output_metadata
def testVectorize(self): matrix = np.arange(6) result = vectorized_add(matrix, matrix) result = compss_wait_on(result) self.assertEqual(result.tolist(), [0.0, 2.0, 4.0, 6.0, 8.0, 10.0])
def main(): import errno import os import sys import time from pycompss.api.api import barrier, compss_wait_on # usage if len(sys.argv) != 8: print("Usage: {} BWA_DB_FILE CONTIG_FILE REFERENCE_FILE REFERENCE_INDEX_FILE INPUT_DIR WORK_DIR " \ "NUM_PROCESSES\n\n" \ "Program name must be called with an absolute path (starting with '/').".format(sys.argv[0])) return 1 # find program directory and basenames cmd_dir = os.path.dirname(sys.argv[0]) if cmd_dir == "" or cmd_dir[0] != '/': print( "Program must be called with an absolute path (starting with '/')") return 1 prog_basename = os.path.basename(os.path.splitext(sys.argv[0])[0]) # read inputs bwa_db_file = sys.argv[1] contig_file = sys.argv[2] ref_file = sys.argv[3] ref_idx_file = sys.argv[4] in_dir_prefix = sys.argv[5] work_dir = sys.argv[6] num_processes = int(sys.argv[7]) # setup directories in_dirs = [in_dir_prefix + '/' + str(x) for x in range(num_processes)] out_dir = "{}/{}_OUT".format(work_dir, prog_basename) try: os.makedirs(out_dir, mode=0o700) except OSError as e: if e.errno != errno.EEXIST: print("Failed to create Directory[{}].\n".format(out_dir)) raise start_time = time.time() # mapping & merge inputs = [] for in_dir in in_dirs: exts = set( [os.path.splitext(file1)[1] for file1 in os.listdir(in_dir)]) for ext in exts: elem = [ in_dir + '/' + f for f in os.listdir(in_dir) if f.endswith(ext) ] elem.sort() inputs.append(elem) # inputs = [[in_dir+'part_1.'+i, in_dir+'part_2.'+i] for i in range(num_processes)] # ~ [[part_1.0, part_2.0], [part_1.1, part_2.1], ...] # print("Inputs: " + str(inputs)) # dbg contigs = reduce( lambda e1, e2: mapping_merge(e1, cmd_dir, bwa_db_file, contig_file, e2 ), inputs, {}) # print("before compss_wait_on") # dbg contigs = compss_wait_on(contigs) # print("after compss_wait_on") # dbg # with open('output.dict', 'w') as f: # dbg # f.write(str(contigs)) # dbg # buckets = split(contigs) # rm_dup & analyze tar_file = init_tar(out_dir) reduce( lambda tar_file1, contig_sam: rmdup_analyze_tar( cmd_dir, ref_idx_file, ref_file, contig_sam, contigs[contig_sam], tar_file1), contigs, tar_file) # print("before barrier") # dbg barrier() # print("after barrier") # dbg print("NGSA-mini-py with {} processes. Elapsed Time {} (s)".format( num_processes, time.time() - start_time))
def testGuvectorize(self): matrix = np.arange(5) result = guvectorized_add(matrix, 2) result = compss_wait_on(result) self.assertEqual(result.tolist(), [2, 3, 4, 5, 6])
def testMasterGenerationIn(self): matrix = [np.random.rand(5) for _ in range(10)] fifth_row = compss_wait_on(select_element(matrix, 4)) self.assertTrue(np.allclose(matrix[4], fifth_row))
def testCfunc(self): result = integrand(20) result = compss_wait_on(result) self.assertEqual(result, 5.152884056096394e-12)
def testReturn(self): ev = myReturn() ev = compss_wait_on(ev) self.assertEqual(ev, 0)
def testExternalFunc(self): result = external(8) result = compss_wait_on(result) self.assertEqual(result, 4)
ExactExpectedValueQoI = exact_execution_task(local_parameters["solver_settings"]["model_import_settings"]["input_filename"].GetString() + ".mdpa", parameter_file_name) for instance in range (0,number_samples): Qlist.append(execution_task(local_parameters["solver_settings"]["model_import_settings"]["input_filename"].GetString() + ".mdpa", parameter_file_name)) '''Compute mean, second moment and sample variance''' MC_mean = 0.0 MC_second_moment = 0.0 for i in range (0,number_samples): nsam = i+1 MC_mean, MC_second_moment, MC_variance = mc.update_onepass_M_VAR(Qlist[i], MC_mean, MC_second_moment, nsam) '''Evaluation of the relative error between the computed mean value and the expected value of the QoI''' relative_error = compare_mean(MC_mean,ExactExpectedValueQoI) # print("Values QoI:",Qlist) MC_mean = compss_wait_on(MC_mean) ExactExpectedValueQoI = compss_wait_on(ExactExpectedValueQoI) relative_error = compss_wait_on(relative_error) print("\nMC mean = ",MC_mean,"exact mean = ",ExactExpectedValueQoI) print("relative error: ",relative_error) ''' The below part evaluates the relative L2 error between the numerical solution SOLUTION(x,y,sample) and the analytical solution, also dependent on sample. Analytical solution available in case FORCING = sample * -432.0 * (coord_x**2 + coord_y**2 - coord_x - coord_y)''' # model = KratosMultiphysics.Model() # sample = 1.0 # simulation = MonteCarloAnalysis(model,local_parameters,sample) # simulation.Run() # KratosMultiphysics.CalculateNodalAreaProcess(simulation._GetSolver().main_model_part,2).Execute() # error = 0.0 # L2norm_analyticalsolution = 0.0
def testExternalFuncConstrained(self): result = externalc(20) result = compss_wait_on(result) self.assertEqual(result, 5)
#send out tasks cc_original = zeros((num_ccs,2*maxlag+1)) cc_surrs = zeros((num_ccs,2*maxlag+1,2)) for frag in range(num_frags): start_idx = end_idx end_idx = end_idx + step if remainder > 0: end_idx += 1 remainder -= 1 print start_idx, " -> ", end_idx - 1 print "Got", (end_idx - start_idx), "ccs" result = cc_surrogate_range(fspikes, start_idx, end_idx, seed, num_neurons, num_surrs, num_bins, maxlag) gather(result, cc_original, cc_surrs, start_idx, end_idx) seed = seed + delta print "submitted all tasks" #save results f = open('./result_cc_originals.dat','w') cc_original = compss_wait_on(cc_original) #print("Originals(", start_idx, "-", end_idx, "):", cc_original[start_idx:end_idx,:]) pickle.dump(cc_original,f) f.close() f = open('./result_cc_surrogates_conf.dat','w') cc_surrs = compss_wait_on(cc_surrs) #print("Surrogates(", start_idx, "-", end_idx, "):", cc_surrs[start_idx:end_idx,:,:]) pickle.dump(cc_surrs,f) f.close()
def run(self, input_files, input_metadata, output_files): """ The main function to run the compute_metrics tool Parameters ---------- input_files : dict List of input files - In this case there are no input files required input_metadata: dict Matching metadata for each of the files, plus any additional data output_files : dict List of the output files that are to be generated Returns ------- output_files : dict List of files with a single entry. output_metadata : dict List of matching metadata for the returned files """ project_path = self.configuration.get('project','.') participant_id = self.configuration['participant_id'] metrics_path = output_files.get("metrics") if metrics_path is None: metrics_path = os.path.join(project_path,participant_id+'.json') metrics_path = os.path.abspath(metrics_path) output_files['metrics'] = metrics_path tar_view_path = output_files.get("tar_view") if tar_view_path is None: tar_view_path = os.path.join(project_path,participant_id+'.tar.gz') tar_view_path = os.path.abspath(tar_view_path) output_files['tar_view'] = tar_view_path results = self.validate_and_assess( os.path.abspath(input_files["genes"]), os.path.abspath(input_files['metrics_ref_datasets']), os.path.abspath(input_files['assessment_datasets']), os.path.abspath(input_files['public_ref']), metrics_path, tar_view_path ) results = compss_wait_on(results) if results is False: logger.fatal("TCGA CD pipeline failed. See logs") raise Exception("TCGA CD pipeline failed. See logs") return {}, {} # BEWARE: Order DOES MATTER when there is a dependency from one output on another output_metadata = { "metrics": Metadata( # These ones are already known by the platform # so comment them by now data_type="metrics", file_type="TXT", file_path=metrics_path, # Reference and golden data set paths should also be here sources=[input_metadata["genes"].file_path], meta_data={ "tool": "TCGA_CD" } ), "tar_view": Metadata( # These ones are already known by the platform # so comment them by now data_type="tool_statistics", file_type="TAR", file_path=tar_view_path, # Reference and golden data set paths should also be here sources=[metrics_path], meta_data={ "tool": "TCGA_CD" } ), } return (output_files, output_metadata)