def test_creates_contrasts_correctly_with_no_contrast_file_given(self): #make some samples: all_samples = [Sample('A', 'X'), Sample('B', 'Y'), Sample('C', 'Z')] p = PipelineBuilder('') mock_pipeline_params = Params() mock_pipeline_params.add(skip_analysis = False) mock_pipeline_params.add(contrast_file = None) p.builder_params = mock_pipeline_params p.all_samples = all_samples p._PipelineBuilder__check_contrast_file() expected_result = set([('X','Y'),('X','Z'),('Y','Z')]) self.assertTrue(set_of_tuples_is_equivalent(p.contrasts, expected_result))
def test_group_countfiles_raises_exception_if_missing_type(self): """ Test the method that aggregates all the countfiles generated from each 'type' of bam file. That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc). We will be generating a countfile for each one of those. When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files). This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed. This one tests that an exception is raised if one of the countfile 'types' is missing. Here, sample B is missing a countfile corresponding to the primary.counts- based BAM files """ p = Params() p.add(feature_counts_output_dir='/path/to/final/featureCounts') s1 = Sample('A', 'X') s1.countfiles = [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/A.primary.dedup.counts' ] s2 = Sample('B', 'Y') s2.countfiles = [ '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/B.primary.dedup.counts' ] s3 = Sample('C', 'Z') s3.countfiles = [ '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/C.primary.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] project = Project() project.add_parameters(p) project.add_samples([s1, s2, s3]) mock_util_methods = mock.Mock() mock_case_insensitive_glob = mock.Mock() mock_case_insensitive_glob.side_effect = [ [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/C.counts' ], [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/C.primary.counts' ], [ '/path/to/final/featureCounts/A.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] ] with self.assertRaises(self.module.CountfileQuantityException): result = self.module.get_countfile_groupings( project, mock_case_insensitive_glob)
def test_group_countfiles(self): """ Test the method that aggregates all the countfiles generated from each 'type' of bam file. That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc). We will be generating a countfile for each one of those. When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files). This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed. """ p = Params() cp = Params() cp.add(feature_counts_output_dir='/path/to/final/featureCounts') s1 = Sample('A', 'X') s1.countfiles = [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/A.primary.dedup.counts' ] s2 = Sample('B', 'Y') s2.countfiles = [ '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/B.primary.counts', '/path/to/final/featureCounts/B.primary.dedup.counts' ] s3 = Sample('C', 'Z') s3.countfiles = [ '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/C.primary.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] project = Project() project.add_parameters(p) project.add_samples([s1, s2, s3]) result = self.module.get_countfile_groupings(project, cp) expected_result = [ [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/C.counts' ], [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/B.primary.counts', '/path/to/final/featureCounts/C.primary.counts' ], [ '/path/to/final/featureCounts/A.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] ] self.assertEqual(result, expected_result)
def test_raises_exception_if_non_sensible_contrast_specified(self, mock_parse): #make some samples: all_samples = [Sample('A', 'X'), Sample('B', 'Y'), Sample('C', 'Z')] p = PipelineBuilder('') mock_pipeline_params = Params() mock_pipeline_params.add(skip_analysis = False) mock_pipeline_params.add(contrast_file = 'contrast.txt') p.builder_params = mock_pipeline_params p.all_samples = all_samples # note the specification of a contrast of Y against A. However, we have no samples from condition A. mock_parse.return_value = set([('X','Y'),('X','Z'),('Y','A')]) with self.assertRaises(ContrastSpecificationException): p._PipelineBuilder__check_contrast_file()
def get_one_thread_samples(thread, max_n_agents, n_prev_sents, test=False): samples = [] sents = [] # agents_in_ctx = set([]) for i, sent in enumerate(thread): time = sent[0] spk_id = sent[1] adr_id = sent[2] label = sent[-1] if adr_id == '-': if len(sents) > 2: # TODO: hyperparameter if len(sents) > 9: sents = sents[:9] sample_json = Sample(sents) samples.append(sample_json) sents = [] else: responses = sent[3:-1] original_sent = get_original_sent(responses, label) sents.append((time, spk_id, adr_id, original_sent)) return samples
def test_generate_figures(self): """ This is not a unit test in the conventional sense-- this is a full-scale mockup which will create an output pdf and everything. """ project = Project() parameters = { 'aligner': 'star', 'skip_align': False, 'sample_dir_prefix': 'Sample_', 'alignment_dir': 'aln', 'project_directory': 'foo', 'chromosomes': ['chr1', 'chr2', 'chrM'] } project.parameters = parameters component_params = cp.read_config( os.path.join(root, 'components', 'pdf_report', 'report.cfg'), 'COMPONENT_SPECIFIC') extra_params = cp.read_config( os.path.join(root, 'components', 'pdf_report', 'report.cfg'), 'STAR') mock_sample_ids = [ os.path.basename(x).split('.')[0] for x in glob.glob( os.path.join( 'test_data', '*' + component_params.get('coverage_file_suffix'))) ] project.samples = [Sample(x, 'X') for x in mock_sample_ids] component_params['report_output_dir'] = os.path.join( os.path.abspath(os.path.dirname(__file__)), test_output_dir, component_params.get('report_output_dir')) if not os.path.isdir(component_params['report_output_dir']): os.mkdir(component_params['report_output_dir']) # link the test files so they 'appear' in the correct location: [ os.symlink( os.path.abspath(x), os.path.join(component_params['report_output_dir'], os.path.basename(x))) for x in glob.glob( os.path.join( 'test_data', '*' + component_params.get('coverage_file_suffix'))) ] mock_log_data = mock_log_data_structure(project, extra_params) self.module.star_methods.process_star_logs = mock.Mock() self.module.star_methods.process_star_logs.return_value = mock_log_data self.module.get_bam_counts = mock.Mock() self.module.get_bam_counts.return_value = mock_bam_counts( mock_log_data.keys()) self.module.calculate_coverage_data = mock.Mock() self.module.calculate_coverage_data.return_value = None self.module.generate_figures(project, component_params, extra_params)
def test_system_calls_single_end_experiment(self): self.module.subprocess = mock.Mock() p = Params() p.add(gtf='/path/to/GTF/mock.gtf') p.add(feature_counts='/path/to/bin/featureCounts') p.add(feature_counts_file_extension='counts') p.add(feature_counts_output_dir='/path/to/final/featureCounts') p.add(paired_alignment=False) s1 = Sample('A', 'X') s1.bamfiles = [ '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam', '/path/to/bamdir/A.primary.dedup.bam' ] project = Project() project.add_parameters(p) project.add_samples([s1]) m = mock.MagicMock(side_effect=[True, True, True]) path = self.module.os.path with mock.patch.object(path, 'isfile', m): self.module.execute_counting(project, util_methods) calls = [ mock.call( '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.counts /path/to/bamdir/A.bam', shell=True), mock.call( '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.primary.counts /path/to/bamdir/A.primary.bam', shell=True), mock.call( '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.primary.dedup.counts /path/to/bamdir/A.primary.dedup.bam', shell=True) ] self.module.subprocess.check_call.assert_has_calls(calls) # check that the sample contains paths to the new count files in the correct locations: expected_files = [ os.path.join('/path/to/final/featureCounts', re.sub('bam', 'counts', os.path.basename(f))) for f in s1.bamfiles ] actual_files = s1.countfiles self.assertEqual(actual_files, expected_files)
def test_bad_bamfile_path_raises_exception(self): self.module.subprocess = mock.Mock() p = Params() p.add(gtf='/path/to/GTF/mock.gtf') p.add(feature_counts='/path/to/bin/featureCounts') p.add(feature_counts_file_extension='counts') p.add(feature_counts_output_dir='/path/to/final/featureCounts') p.add(paired_alignment=False) s1 = Sample('A', 'X') s1.bamfiles = [ '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam', '/path/to/bamdir/A.primary.dedup.bam' ] s2 = Sample('B', 'X') s2.bamfiles = ['/path/to/bamdir/B.bam', '/bad/path/B.sort.bam'] project = Project() project.add_parameters(p) project.add_samples([s1, s2]) m = mock.MagicMock(side_effect=[True, True, True, True, False]) path = self.module.os.path with mock.patch.object(path, 'isfile', m): with self.assertRaises(self.module.MissingBamFileException): self.module.execute_counting(project, util_methods)
def test_sample_specific_template_injected_correctly_for_paired_alignment(self): sample_template = 'FASTQFILEA=%FASTQFILEA%\nFASTQFILEB=%FASTQFILEB%\nSAMPLE_NAME=%SAMPLE_NAME%\nPAIRED=%PAIRED%\nOUTDIR=%OUTDIR%\nFCID=%FCID%\nLANE=%LANE%\nINDEX=%INDEX%\n' expected_result = 'FASTQFILEA=/path/to/ABC_r1_001.fastq.gz\nFASTQFILEB=/path/to/ABC_r2_001.fastq.gz\nSAMPLE_NAME=ABC\nPAIRED=1\nOUTDIR=/path/to/aln\nFCID=DEFAULT\nLANE=0\nINDEX=DEFAULT_INDEX\n' s = Sample('ABC', 'X', read_1_fastq = '/path/to/ABC_r1_001.fastq.gz', read_2_fastq = '/path/to/ABC_r2_001.fastq.gz') s.alignment_dir = '/path/to/aln' s.flowcell_id = 'DEFAULT' s.lane = '0' s.index = 'DEFAULT_INDEX' result = self.module.fill_out_sample_specific_portion(s, sample_template) self.assertEqual(result, expected_result)
def _parse_vcf_file(self, filename, filter_zero_maf=False): """ Read in and process the given VCF file :param filename: path to the VCF file """ with open(filename) as vcf_file: logger.info('Reading VCF file {}'.format(filename)) f_tsv = csv.reader(vcf_file, delimiter='\t') # process rows in VCF file named_row = None # regex patterns for making column names in VCF files valid identifiers p_replace = re.compile(r'( |/)') p_reseeded = re.compile(r'<') p_remove = re.compile(r'#') self.samples = dict() for row in f_tsv: if row[0].startswith('##'): # skip the meta-information continue elif row[0].startswith('#CHROM'): # process VCF header headers = [p_replace.sub('_', p_remove.sub('', p_reseeded.sub('Res', e))) for e in row] logger.debug('Header: {}'.format(headers)) named_row = namedtuple('variant', headers) if len(row) > 9: # samples are present and hence their format has to specified first logger.info('Found data for {} samples: {}'.format(len(row)-9, headers[9:])) # add identified samples and generate sample objects for sample_name in headers[9:]: self.samples[sample_name] = Sample(sample_name) else: raise ValueError('No data is found in the provided VCF file: {}'.format(filename)) elif row[0].startswith('#'): # comment # skip continue else: # process variants var = named_row(*row) failed_filter = False gene_name = None var_type = None for filter_name in var.FILTER.split(';'): if filter_name == 'PASS': continue elif filter_name == 'REJECT': failed_filter = True elif filter_name == 'StrandBiasFilter' \ or filter_name == 'Mask' \ or filter_name == 'SnpCluster' \ or filter_name == 'HARD_TO_VALIDATE': # Variant data did not pass general filtering # logger.debug('Mutation at chr {} and pos {} did not pass the filtering.' # .format(r.CHROM, r.POS)) failed_filter = True elif filter_name == 'mf1': # variant did not pass MuTect filtering failed_filter = True elif filter_name == 'GATKStandardFilter': # variant did not pass GATK filtering failed_filter = True else: logger.warning('Unrecognized filter value: {}'.format(filter_name)) failed_filter = True for info in var.INFO.split(';'): # if info == 'SVTYPE=DUP' or info == 'SVTYPE=DEL': # failed_filter = True if info.startswith('SVTYPE='): var_type = 'SV-'+info[7:] elif info.startswith('GN='): # gene name gene_name = info[3:] elif info.startswith('ANN='): # functional annotations var_type = info[4:] if failed_filter: # variant did not pass all filters continue # variant passed filter and is called for all provided samples named_format = namedtuple('sample', var.FORMAT.split(':')) # generate separate variant directories for each sample for sa_idx, sa in enumerate(var[9:], 9): # Standard cancer format: VCF files contains two samples named NORMAL and PRIMARY # if 'NORMAL' in headers and 'PRIMARY' in headers, NORMAL could be skipped # if headers[sa_idx] == 'NORMAL': # continue try: sample = named_format(*(sa.split(':'))) variant = generate_variant(var, sample, gene_name=gene_name, var_type=var_type) # the reason for these are that multiple samples have been merged # in a single VCF file, but almost all point mutations occur only in one patient if filter_zero_maf and variant.BAF == 0: # logger.warn('Excluded variant {} since its BAF is 0.'.format(str(variant))) # self.variants[headers[sa_idx]][(variant.CHROM, variant.POS)] = variant pass else: # add variant to dictionary # self.variants[headers[sa_idx]][(variant.CHROM, variant.POS)] = variant self.samples[headers[sa_idx]].add_variant(variant) # logger.debug(row) except TypeError: if sa == './.': logger.debug('No data ({}) for sample {} at chr {} and pos {}' .format(sa, headers[sa_idx], var.CHROM, var.POS)) else: logger.warning('Could not parse data {} for sample {} at chr {} and pos {}' .format(sa, headers[sa_idx], var.CHROM, var.POS)) logger.info('Row {}'.format(row)) if logger.isEnabledFor(logging.DEBUG): logging.exception('A variant was not parsed successfully!') for sample_name in headers[9:]: logger.debug('{} variants were detected in sample {}.'.format( len(self.samples[sample_name].variants), sample_name))
def test_fill_template(self): project = Project() parameters = { 'bam_filter_level': 'sort.primary', 'project_directory': 'abc/foo/AB_12345', 'genome': 'hg19', 'genome_source_link': 'ftp://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/dna/', 'skip_align': False, 'skip_analysis': False } project.parameters = parameters component_params = cp.read_config( os.path.join(root, 'components', 'pdf_report', 'report.cfg'), 'COMPONENT_SPECIFIC') extra_params = cp.read_config( os.path.join(root, 'components', 'pdf_report', 'report.cfg'), 'STAR') mock_sample_ids = [ os.path.basename(x).split('.')[0] for x in glob.glob( os.path.join( 'test_data', '*' + component_params.get('coverage_file_suffix'))) ] project.samples = [Sample(x, 'X') for x in mock_sample_ids] project.contrasts = [('X', 'Y'), ('X', 'Z'), ('Y', 'Z')] component_params['report_output_dir'] = os.path.join( os.path.abspath(os.path.dirname(__file__)), test_output_dir, component_params.get('report_output_dir')) if not os.path.isdir(component_params['report_output_dir']): os.mkdir(component_params['report_output_dir']) # link figures so they appear where they should be. figure_list = glob.glob( os.path.join(os.path.dirname(__file__), 'test_data', '*' + component_params.get('coverage_plot_suffix'))) figure_list += [ os.path.join(os.path.dirname(__file__), 'test_data', 'bamfile_reads.pdf'), os.path.join(os.path.dirname(__file__), 'test_data', 'mapping_composition.pdf'), os.path.join(os.path.dirname(__file__), 'test_data', 'total_reads.pdf'), os.path.join('components', 'pdf_report', 'igv_typical.png'), os.path.join('components', 'pdf_report', 'igv_duplicates.png') ] [ os.symlink( os.path.join(root, f), os.path.join(component_params['report_output_dir'], os.path.basename(f))) for f in figure_list ] self.module.get_diff_exp_gene_summary = mock.Mock() self.module.get_diff_exp_gene_summary.return_value = [[ 'X', 'Y', 100, 200 ], ['Y_1', 'Z_2', 400, 300], ['X_2', 'Z_3', 150, 300]] env = jinja2.Environment(loader=jinja2.FileSystemLoader( os.path.join(root, 'components', 'pdf_report'))) template = env.get_template(component_params.get('report_template')) self.module.fill_template(template, project, component_params) self.module.compile_report(project, component_params)
def test_system_call_to_bedtools(self): project = Project() parameters = { 'bam_filter_level': 'sort.primary', 'project_directory': 'abc/foo/AB_12345', 'genome': 'hg19', 'genome_source_link': 'ftp://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/dna/', 'skip_align': False, 'skip_analysis': False } project.parameters = parameters mock_dir = '/abc/def/' mock_sample_names = ['AAA', 'BBB', 'CCC'] levels = ['sort.bam', 'sort.primary.bam', 'sort.primary.dedup.bam'] all_samples = [] for sn in mock_sample_names: bamfiles = map(lambda x: os.path.join(mock_dir, sn + '.' + x), levels) s = Sample(sn, 'X', bamfiles=bamfiles) all_samples.append(s) project.samples = all_samples component_params = cp.read_config( os.path.join(root, 'components', 'pdf_report', 'report.cfg'), 'COMPONENT_SPECIFIC') self.module.subprocess.Popen = mock.Mock() mock_process = mock.Mock() mock_process.communicate.return_value = (('abc', 'def')) mock_process.returncode = 0 self.module.subprocess.Popen.return_value = mock_process self.module.subprocess.STDOUT = 'abc' self.module.subprocess.STDERR = 'def' m = mock.mock_open() with mock.patch.object(__builtin__, 'open', m) as x: expected_calls = [ mock.call([ component_params.get('bedtools_path'), component_params.get('bedtools_cmd'), '-ibam', '/abc/def/AAA.sort.primary.bam', '-bga' ], stderr='abc', stdout=m()), mock.call().communicate(), mock.call([ component_params.get('bedtools_path'), component_params.get('bedtools_cmd'), '-ibam', '/abc/def/BBB.sort.primary.bam', '-bga' ], stderr='abc', stdout=m()), mock.call().communicate(), mock.call([ component_params.get('bedtools_path'), component_params.get('bedtools_cmd'), '-ibam', '/abc/def/CCC.sort.primary.bam', '-bga' ], stderr='abc', stdout=m()), mock.call().communicate() ] self.module.calculate_coverage_data(project, component_params) self.module.subprocess.Popen.assert_has_calls(expected_calls)
GPUs = len(tf.config.experimental.list_physical_devices('GPU')) CPUs = len(tf.config.experimental.list_physical_devices('CPU')) if GPUs > 0: print("Num GPUs Available: ", GPUs) print("Num CPUs Available: ", CPUs) config = tf.compat.v1.ConfigProto(device_count={'GPU': GPUs, 'CPU': CPUs}) sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) model = structure(3, 950, 'mean_squared_error', 'relu', 'random_uniform', 'adam', 0) muestra = Sample(ratio=[0.4, 1.6], T=[0.2, 1.1], r=[0.02, 0.1], o=[0.01, 1.0]) muestra.create('train', 10**6, log=True) x_train, y_train = muestra.open('train', log=True) muestra.create('test', 10**6, log=True) x_test, y_test = muestra.open('test', log=True) muestra.create('validation', 10**5, log=True) x_val, y_val = muestra.open('validation', log=True) def step_decay(epoch):
def test_system_calls_paired_experiment(self): mock_process = mock.Mock(name='mock_process') mock_process.communicate.return_value = (('', '')) mock_process.returncode = 0 mock_popen = mock.Mock(name='mock_popen') mock_popen.return_value = mock_process self.module.subprocess = mock.Mock() self.module.subprocess.Popen = mock_popen self.module.subprocess.STDOUT = '' self.module.subprocess.PIPE = '' p = Params() cp = Params() p.add(gtf='/path/to/GTF/mock.gtf') cp.add(feature_counts='/path/to/bin/featureCounts') cp.add(feature_counts_file_extension='counts') cp.add(feature_counts_output_dir='/path/to/final/featureCounts') p.add(paired_alignment=True) s1 = Sample('A', 'X') s1.bamfiles = [ '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam', '/path/to/bamdir/A.primary.dedup.bam' ] project = Project() project.add_parameters(p) project.add_samples([s1]) m = mock.MagicMock(side_effect=[True, True, True]) path = self.module.os.path with mock.patch.object(path, 'isfile', m): self.module.execute_counting(project, cp, util_methods) calls = [ mock.call( '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.counts /path/to/bamdir/A.bam', shell=True, stderr=self.module.subprocess.STDOUT, stdout=self.module.subprocess.PIPE), mock.call( '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.primary.counts /path/to/bamdir/A.primary.bam', shell=True, stderr=self.module.subprocess.STDOUT, stdout=self.module.subprocess.PIPE), mock.call( '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.primary.dedup.counts /path/to/bamdir/A.primary.dedup.bam', shell=True, stderr=self.module.subprocess.STDOUT, stdout=self.module.subprocess.PIPE) ] mock_popen.assert_has_calls(calls) # check that the sample contains paths to the new count files in the correct locations: expected_files = [ os.path.join('/path/to/final/featureCounts', re.sub('bam', 'counts', os.path.basename(f))) for f in s1.bamfiles ] actual_files = s1.countfiles self.assertEqual(actual_files, expected_files)
#coding=utf-8 from keras.models import load_model from utils.sample import Sample from utils.black_scholes import raiz_ratio, d1_ratio, call_price_ratio from utils.biseccion import bisec import time import pickle import math import numpy as np from os import path, mkdir, strerror from scipy.optimize import brentq import tensorflow as tf opn = Sample(ratio=[0.4, 1.6], T=[0.2, 1.1], r=[0.02, 0.1], o=[0.01, 1]) opn.create('prueba', N=10**5) x_test, y_test = opn.open('prueba') start_time = time.time() j = 0 fails = [] i = 0 for c, ratio, r, T in x_test: def f(x): return raiz_ratio(c, ratio, r, x, T) #x seria la variable para aplicar bisección
def test_countfile_merging(self): """ This tests that the correct files are used to merge. The result (a data structure) of the merging is mocked out. Tests that the expected data is written to the file and tests that the file ends up in the correct location """ # a dummy method to mock the reading/concatenating of the data in the individual files def mock_read(matrix, f): dummy = [['geneA', '0', '100', '200'], ['geneB', '1', '101', '201'], ['geneC', '2', '102', '202']] if len(matrix) == 0: for k in range(len(dummy)): matrix.append([]) for i, l in enumerate(dummy): matrix[i] = l # mock out the actual implementations self.module.get_countfile_groupings = mock.Mock() self.module.get_countfile_groupings.return_value = [ [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/B.counts' ], [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/C.primary.counts' ], [ '/path/to/final/featureCounts/A.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] ] self.module.read = mock_read p = Params() p.add(raw_count_matrix_file_prefix='merged_counts') s1 = Sample('A', 'X') s1.countfiles = [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/A.primary.dedup.counts' ] s2 = Sample('B', 'Y') s2.countfiles = [ '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.counts' ] s3 = Sample('C', 'Z') s3.countfiles = [ '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/C.primary.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] project = Project() project.add_parameters(p) project.add_samples([s1, s3, s2]) m = mock.mock_open() with mock.patch.object(__builtin__, 'open', m): self.module.create_count_matrices(project, mock.Mock()) m.assert_any_call( '/path/to/final/featureCounts/merged_counts.counts', 'w') m.assert_any_call( '/path/to/final/featureCounts/merged_counts.primary.counts', 'w') m.assert_any_call( '/path/to/final/featureCounts/merged_counts.primary.dedup.counts', 'w') handle = m() calls = [ mock.call('Gene\tA\tB\tC\n'), mock.call('geneA\t0\t100\t200\n'), mock.call('geneB\t1\t101\t201\n'), mock.call('geneC\t2\t102\t202\n') ] * 3 handle.write.assert_has_calls(calls)
#coding=utf-8 from utils.structure import structure from utils.learning_rate_finder import LearningRateFinder from utils.sample import Sample import pickle import numpy as np import matplotlib.pyplot as plt model = structure(3, 950, 'mean_squared_error', 'relu', 'random_uniform', 'adam', 0) lrf = LearningRateFinder(model) muestra = Sample(ratio=[0.4, 1.6], T=[0.2, 1.1], r=[0.02, 0.1], o=[0.01, 1.0]) muestra.create('sample', 10**4, log=True) x, y = muestra.open('sample', log=True) lrf.find( x, y, 1e-10, 1e+1, stepsPerEpoch=np.ceil((len(x) / float(1024))), batchSize=1024) lrf.plot_loss() #Método de smith plt.savefig('Learning_Rate')