def test_creates_contrasts_correctly_with_no_contrast_file_given(self):

		#make some samples:
		all_samples = [Sample('A', 'X'), Sample('B', 'Y'), Sample('C', 'Z')]
		p = PipelineBuilder('')
		mock_pipeline_params = Params()
		mock_pipeline_params.add(skip_analysis = False)
		mock_pipeline_params.add(contrast_file = None)
		p.builder_params = mock_pipeline_params
		p.all_samples = all_samples

		p._PipelineBuilder__check_contrast_file()
		expected_result = set([('X','Y'),('X','Z'),('Y','Z')])
		self.assertTrue(set_of_tuples_is_equivalent(p.contrasts, expected_result))
    def test_group_countfiles_raises_exception_if_missing_type(self):
        """
		Test the method that aggregates all the countfiles generated from each 'type' of bam file.  That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc).
		We will be generating a countfile for each one of those.  When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files).
		This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed.

		This one tests that an exception is raised if one of the countfile 'types' is missing.  Here, sample B is missing a countfile corresponding to the primary.counts- based BAM files
		"""

        p = Params()
        p.add(feature_counts_output_dir='/path/to/final/featureCounts')

        s1 = Sample('A', 'X')
        s1.countfiles = [
            '/path/to/final/featureCounts/A.counts',
            '/path/to/final/featureCounts/A.primary.counts',
            '/path/to/final/featureCounts/A.primary.dedup.counts'
        ]
        s2 = Sample('B', 'Y')
        s2.countfiles = [
            '/path/to/final/featureCounts/B.counts',
            '/path/to/final/featureCounts/B.primary.dedup.counts'
        ]
        s3 = Sample('C', 'Z')
        s3.countfiles = [
            '/path/to/final/featureCounts/C.counts',
            '/path/to/final/featureCounts/C.primary.counts',
            '/path/to/final/featureCounts/C.primary.dedup.counts'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s2, s3])

        mock_util_methods = mock.Mock()
        mock_case_insensitive_glob = mock.Mock()
        mock_case_insensitive_glob.side_effect = [
            [
                '/path/to/final/featureCounts/A.counts',
                '/path/to/final/featureCounts/B.counts',
                '/path/to/final/featureCounts/C.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.counts',
                '/path/to/final/featureCounts/C.primary.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.dedup.counts',
                '/path/to/final/featureCounts/B.primary.dedup.counts',
                '/path/to/final/featureCounts/C.primary.dedup.counts'
            ]
        ]
        with self.assertRaises(self.module.CountfileQuantityException):
            result = self.module.get_countfile_groupings(
                project, mock_case_insensitive_glob)
    def test_group_countfiles(self):
        """
		Test the method that aggregates all the countfiles generated from each 'type' of bam file.  That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc).
		We will be generating a countfile for each one of those.  When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files).
		This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed.
		"""

        p = Params()
        cp = Params()
        cp.add(feature_counts_output_dir='/path/to/final/featureCounts')

        s1 = Sample('A', 'X')
        s1.countfiles = [
            '/path/to/final/featureCounts/A.counts',
            '/path/to/final/featureCounts/A.primary.counts',
            '/path/to/final/featureCounts/A.primary.dedup.counts'
        ]
        s2 = Sample('B', 'Y')
        s2.countfiles = [
            '/path/to/final/featureCounts/B.counts',
            '/path/to/final/featureCounts/B.primary.counts',
            '/path/to/final/featureCounts/B.primary.dedup.counts'
        ]
        s3 = Sample('C', 'Z')
        s3.countfiles = [
            '/path/to/final/featureCounts/C.counts',
            '/path/to/final/featureCounts/C.primary.counts',
            '/path/to/final/featureCounts/C.primary.dedup.counts'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s2, s3])

        result = self.module.get_countfile_groupings(project, cp)
        expected_result = [
            [
                '/path/to/final/featureCounts/A.counts',
                '/path/to/final/featureCounts/B.counts',
                '/path/to/final/featureCounts/C.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.counts',
                '/path/to/final/featureCounts/B.primary.counts',
                '/path/to/final/featureCounts/C.primary.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.dedup.counts',
                '/path/to/final/featureCounts/B.primary.dedup.counts',
                '/path/to/final/featureCounts/C.primary.dedup.counts'
            ]
        ]
        self.assertEqual(result, expected_result)
	def test_raises_exception_if_non_sensible_contrast_specified(self, mock_parse):

		#make some samples:
		all_samples = [Sample('A', 'X'), Sample('B', 'Y'), Sample('C', 'Z')]
		p = PipelineBuilder('')
		mock_pipeline_params = Params()
		mock_pipeline_params.add(skip_analysis = False)
		mock_pipeline_params.add(contrast_file = 'contrast.txt')
		p.builder_params = mock_pipeline_params
		p.all_samples = all_samples

		# note the specification of a contrast of Y against A.  However, we have no samples from condition A.
		mock_parse.return_value = set([('X','Y'),('X','Z'),('Y','A')]) 

		with self.assertRaises(ContrastSpecificationException):
			p._PipelineBuilder__check_contrast_file()
Esempio n. 5
0
def get_one_thread_samples(thread, max_n_agents, n_prev_sents, test=False):
    samples = []
    sents = []
    # agents_in_ctx = set([])

    for i, sent in enumerate(thread):
        time = sent[0]
        spk_id = sent[1]
        adr_id = sent[2]
        label = sent[-1]

        if adr_id == '-':
            if len(sents) > 2:
                # TODO: hyperparameter
                if len(sents) > 9:
                    sents = sents[:9]
                sample_json = Sample(sents)
                samples.append(sample_json)
            sents = []
        else:
            responses = sent[3:-1]
            original_sent = get_original_sent(responses, label)
            sents.append((time, spk_id, adr_id, original_sent))

    return samples
Esempio n. 6
0
    def test_generate_figures(self):
        """
		This is not a unit test in the conventional sense-- this is a full-scale mockup which will
		create an output pdf and everything.
		"""

        project = Project()
        parameters = {
            'aligner': 'star',
            'skip_align': False,
            'sample_dir_prefix': 'Sample_',
            'alignment_dir': 'aln',
            'project_directory': 'foo',
            'chromosomes': ['chr1', 'chr2', 'chrM']
        }
        project.parameters = parameters

        component_params = cp.read_config(
            os.path.join(root, 'components', 'pdf_report', 'report.cfg'),
            'COMPONENT_SPECIFIC')
        extra_params = cp.read_config(
            os.path.join(root, 'components', 'pdf_report', 'report.cfg'),
            'STAR')

        mock_sample_ids = [
            os.path.basename(x).split('.')[0] for x in glob.glob(
                os.path.join(
                    'test_data', '*' +
                    component_params.get('coverage_file_suffix')))
        ]
        project.samples = [Sample(x, 'X') for x in mock_sample_ids]

        component_params['report_output_dir'] = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), test_output_dir,
            component_params.get('report_output_dir'))
        if not os.path.isdir(component_params['report_output_dir']):
            os.mkdir(component_params['report_output_dir'])

        # link the test files so they 'appear' in the correct location:
        [
            os.symlink(
                os.path.abspath(x),
                os.path.join(component_params['report_output_dir'],
                             os.path.basename(x)))
            for x in glob.glob(
                os.path.join(
                    'test_data', '*' +
                    component_params.get('coverage_file_suffix')))
        ]

        mock_log_data = mock_log_data_structure(project, extra_params)
        self.module.star_methods.process_star_logs = mock.Mock()
        self.module.star_methods.process_star_logs.return_value = mock_log_data

        self.module.get_bam_counts = mock.Mock()
        self.module.get_bam_counts.return_value = mock_bam_counts(
            mock_log_data.keys())
        self.module.calculate_coverage_data = mock.Mock()
        self.module.calculate_coverage_data.return_value = None
        self.module.generate_figures(project, component_params, extra_params)
    def test_system_calls_single_end_experiment(self):
        self.module.subprocess = mock.Mock()

        p = Params()
        p.add(gtf='/path/to/GTF/mock.gtf')
        p.add(feature_counts='/path/to/bin/featureCounts')
        p.add(feature_counts_file_extension='counts')
        p.add(feature_counts_output_dir='/path/to/final/featureCounts')
        p.add(paired_alignment=False)

        s1 = Sample('A', 'X')
        s1.bamfiles = [
            '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam',
            '/path/to/bamdir/A.primary.dedup.bam'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1])

        m = mock.MagicMock(side_effect=[True, True, True])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            self.module.execute_counting(project, util_methods)

            calls = [
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.counts /path/to/bamdir/A.bam',
                    shell=True),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.primary.counts /path/to/bamdir/A.primary.bam',
                    shell=True),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.primary.dedup.counts /path/to/bamdir/A.primary.dedup.bam',
                    shell=True)
            ]
            self.module.subprocess.check_call.assert_has_calls(calls)

            # check that the sample contains paths to the new count files in the correct locations:
            expected_files = [
                os.path.join('/path/to/final/featureCounts',
                             re.sub('bam', 'counts', os.path.basename(f)))
                for f in s1.bamfiles
            ]
            actual_files = s1.countfiles
            self.assertEqual(actual_files, expected_files)
    def test_bad_bamfile_path_raises_exception(self):

        self.module.subprocess = mock.Mock()

        p = Params()
        p.add(gtf='/path/to/GTF/mock.gtf')
        p.add(feature_counts='/path/to/bin/featureCounts')
        p.add(feature_counts_file_extension='counts')
        p.add(feature_counts_output_dir='/path/to/final/featureCounts')
        p.add(paired_alignment=False)

        s1 = Sample('A', 'X')
        s1.bamfiles = [
            '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam',
            '/path/to/bamdir/A.primary.dedup.bam'
        ]
        s2 = Sample('B', 'X')
        s2.bamfiles = ['/path/to/bamdir/B.bam', '/bad/path/B.sort.bam']

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s2])

        m = mock.MagicMock(side_effect=[True, True, True, True, False])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            with self.assertRaises(self.module.MissingBamFileException):
                self.module.execute_counting(project, util_methods)
	def test_sample_specific_template_injected_correctly_for_paired_alignment(self):
		sample_template = 'FASTQFILEA=%FASTQFILEA%\nFASTQFILEB=%FASTQFILEB%\nSAMPLE_NAME=%SAMPLE_NAME%\nPAIRED=%PAIRED%\nOUTDIR=%OUTDIR%\nFCID=%FCID%\nLANE=%LANE%\nINDEX=%INDEX%\n'
		expected_result = 'FASTQFILEA=/path/to/ABC_r1_001.fastq.gz\nFASTQFILEB=/path/to/ABC_r2_001.fastq.gz\nSAMPLE_NAME=ABC\nPAIRED=1\nOUTDIR=/path/to/aln\nFCID=DEFAULT\nLANE=0\nINDEX=DEFAULT_INDEX\n'
		s = Sample('ABC', 'X', read_1_fastq = '/path/to/ABC_r1_001.fastq.gz', read_2_fastq = '/path/to/ABC_r2_001.fastq.gz')
		s.alignment_dir = '/path/to/aln'
		s.flowcell_id = 'DEFAULT'
		s.lane = '0'
		s.index = 'DEFAULT_INDEX'
		result = self.module.fill_out_sample_specific_portion(s, sample_template)
		self.assertEqual(result, expected_result)
Esempio n. 10
0
    def _parse_vcf_file(self, filename, filter_zero_maf=False):
        """
        Read in and process the given VCF file
        :param filename: path to the VCF file
        """

        with open(filename) as vcf_file:
            logger.info('Reading VCF file {}'.format(filename))
            f_tsv = csv.reader(vcf_file, delimiter='\t')

            # process rows in VCF file
            named_row = None

            # regex patterns for making column names in VCF files valid identifiers
            p_replace = re.compile(r'( |/)')
            p_reseeded = re.compile(r'<')
            p_remove = re.compile(r'#')

            self.samples = dict()

            for row in f_tsv:
                if row[0].startswith('##'):
                    # skip the meta-information
                    continue

                elif row[0].startswith('#CHROM'):                # process VCF header
                    headers = [p_replace.sub('_', p_remove.sub('', p_reseeded.sub('Res', e))) for e in row]

                    logger.debug('Header: {}'.format(headers))
                    named_row = namedtuple('variant', headers)

                    if len(row) > 9:        # samples are present and hence their format has to specified first
                        logger.info('Found data for {} samples: {}'.format(len(row)-9, headers[9:]))

                        # add identified samples and generate sample objects
                        for sample_name in headers[9:]:
                            self.samples[sample_name] = Sample(sample_name)

                    else:
                        raise ValueError('No data is found in the provided VCF file: {}'.format(filename))

                elif row[0].startswith('#'):                # comment
                    # skip
                    continue

                else:                                       # process variants
                    var = named_row(*row)

                    failed_filter = False
                    gene_name = None
                    var_type = None

                    for filter_name in var.FILTER.split(';'):

                        if filter_name == 'PASS':
                            continue
                        elif filter_name == 'REJECT':
                            failed_filter = True
                        elif filter_name == 'StrandBiasFilter' \
                                or filter_name == 'Mask' \
                                or filter_name == 'SnpCluster' \
                                or filter_name == 'HARD_TO_VALIDATE':     # Variant data did not pass general filtering
                            # logger.debug('Mutation at chr {} and pos {} did not pass the filtering.'
                            #               .format(r.CHROM, r.POS))
                            failed_filter = True

                        elif filter_name == 'mf1':               # variant did not pass MuTect filtering
                            failed_filter = True
                        elif filter_name == 'GATKStandardFilter':               # variant did not pass GATK filtering
                            failed_filter = True
                        else:
                            logger.warning('Unrecognized filter value: {}'.format(filter_name))
                            failed_filter = True

                    for info in var.INFO.split(';'):
                        # if info == 'SVTYPE=DUP' or info == 'SVTYPE=DEL':
                            # failed_filter = True
                        if info.startswith('SVTYPE='):
                            var_type = 'SV-'+info[7:]
                        elif info.startswith('GN='):        # gene name
                            gene_name = info[3:]
                        elif info.startswith('ANN='):       # functional annotations
                            var_type = info[4:]
                    if failed_filter:
                        # variant did not pass all filters
                        continue

                    # variant passed filter and is called for all provided samples
                    named_format = namedtuple('sample', var.FORMAT.split(':'))

                    # generate separate variant directories for each sample
                    for sa_idx, sa in enumerate(var[9:], 9):

                        # Standard cancer format: VCF files contains two samples named NORMAL and PRIMARY
                        # if 'NORMAL' in headers and 'PRIMARY' in headers, NORMAL could be skipped
                        # if headers[sa_idx] == 'NORMAL':
                        #     continue

                        try:
                            sample = named_format(*(sa.split(':')))

                            variant = generate_variant(var, sample, gene_name=gene_name, var_type=var_type)

                            # the reason for these are that multiple samples have been merged
                            # in a single VCF file, but almost all point mutations occur only in one patient
                            if filter_zero_maf and variant.BAF == 0:
                                # logger.warn('Excluded variant {} since its BAF is 0.'.format(str(variant)))
                                # self.variants[headers[sa_idx]][(variant.CHROM, variant.POS)] = variant
                                pass
                            else:
                                # add variant to dictionary
                                # self.variants[headers[sa_idx]][(variant.CHROM, variant.POS)] = variant
                                self.samples[headers[sa_idx]].add_variant(variant)

                            # logger.debug(row)

                        except TypeError:
                            if sa == './.':
                                logger.debug('No data ({}) for sample {} at chr {} and pos {}'
                                             .format(sa, headers[sa_idx], var.CHROM, var.POS))
                            else:
                                logger.warning('Could not parse data {} for sample {} at chr {} and pos {}'
                                               .format(sa, headers[sa_idx], var.CHROM, var.POS))
                                logger.info('Row {}'.format(row))

                                if logger.isEnabledFor(logging.DEBUG):
                                    logging.exception('A variant was not parsed successfully!')

            for sample_name in headers[9:]:
                logger.debug('{} variants were detected in sample {}.'.format(
                    len(self.samples[sample_name].variants), sample_name))
Esempio n. 11
0
    def test_fill_template(self):

        project = Project()
        parameters = {
            'bam_filter_level': 'sort.primary',
            'project_directory': 'abc/foo/AB_12345',
            'genome': 'hg19',
            'genome_source_link':
            'ftp://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/dna/',
            'skip_align': False,
            'skip_analysis': False
        }

        project.parameters = parameters

        component_params = cp.read_config(
            os.path.join(root, 'components', 'pdf_report', 'report.cfg'),
            'COMPONENT_SPECIFIC')
        extra_params = cp.read_config(
            os.path.join(root, 'components', 'pdf_report', 'report.cfg'),
            'STAR')

        mock_sample_ids = [
            os.path.basename(x).split('.')[0] for x in glob.glob(
                os.path.join(
                    'test_data', '*' +
                    component_params.get('coverage_file_suffix')))
        ]
        project.samples = [Sample(x, 'X') for x in mock_sample_ids]
        project.contrasts = [('X', 'Y'), ('X', 'Z'), ('Y', 'Z')]

        component_params['report_output_dir'] = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), test_output_dir,
            component_params.get('report_output_dir'))
        if not os.path.isdir(component_params['report_output_dir']):
            os.mkdir(component_params['report_output_dir'])

        # link figures so they appear where they should be.
        figure_list = glob.glob(
            os.path.join(os.path.dirname(__file__), 'test_data',
                         '*' + component_params.get('coverage_plot_suffix')))
        figure_list += [
            os.path.join(os.path.dirname(__file__), 'test_data',
                         'bamfile_reads.pdf'),
            os.path.join(os.path.dirname(__file__), 'test_data',
                         'mapping_composition.pdf'),
            os.path.join(os.path.dirname(__file__), 'test_data',
                         'total_reads.pdf'),
            os.path.join('components', 'pdf_report', 'igv_typical.png'),
            os.path.join('components', 'pdf_report', 'igv_duplicates.png')
        ]
        [
            os.symlink(
                os.path.join(root, f),
                os.path.join(component_params['report_output_dir'],
                             os.path.basename(f))) for f in figure_list
        ]

        self.module.get_diff_exp_gene_summary = mock.Mock()
        self.module.get_diff_exp_gene_summary.return_value = [[
            'X', 'Y', 100, 200
        ], ['Y_1', 'Z_2', 400, 300], ['X_2', 'Z_3', 150, 300]]

        env = jinja2.Environment(loader=jinja2.FileSystemLoader(
            os.path.join(root, 'components', 'pdf_report')))
        template = env.get_template(component_params.get('report_template'))

        self.module.fill_template(template, project, component_params)
        self.module.compile_report(project, component_params)
Esempio n. 12
0
    def test_system_call_to_bedtools(self):

        project = Project()
        parameters = {
            'bam_filter_level': 'sort.primary',
            'project_directory': 'abc/foo/AB_12345',
            'genome': 'hg19',
            'genome_source_link':
            'ftp://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/dna/',
            'skip_align': False,
            'skip_analysis': False
        }

        project.parameters = parameters

        mock_dir = '/abc/def/'
        mock_sample_names = ['AAA', 'BBB', 'CCC']
        levels = ['sort.bam', 'sort.primary.bam', 'sort.primary.dedup.bam']

        all_samples = []
        for sn in mock_sample_names:
            bamfiles = map(lambda x: os.path.join(mock_dir, sn + '.' + x),
                           levels)
            s = Sample(sn, 'X', bamfiles=bamfiles)
            all_samples.append(s)

        project.samples = all_samples

        component_params = cp.read_config(
            os.path.join(root, 'components', 'pdf_report', 'report.cfg'),
            'COMPONENT_SPECIFIC')

        self.module.subprocess.Popen = mock.Mock()

        mock_process = mock.Mock()
        mock_process.communicate.return_value = (('abc', 'def'))
        mock_process.returncode = 0
        self.module.subprocess.Popen.return_value = mock_process
        self.module.subprocess.STDOUT = 'abc'
        self.module.subprocess.STDERR = 'def'

        m = mock.mock_open()
        with mock.patch.object(__builtin__, 'open', m) as x:
            expected_calls = [
                mock.call([
                    component_params.get('bedtools_path'),
                    component_params.get('bedtools_cmd'), '-ibam',
                    '/abc/def/AAA.sort.primary.bam', '-bga'
                ],
                          stderr='abc',
                          stdout=m()),
                mock.call().communicate(),
                mock.call([
                    component_params.get('bedtools_path'),
                    component_params.get('bedtools_cmd'), '-ibam',
                    '/abc/def/BBB.sort.primary.bam', '-bga'
                ],
                          stderr='abc',
                          stdout=m()),
                mock.call().communicate(),
                mock.call([
                    component_params.get('bedtools_path'),
                    component_params.get('bedtools_cmd'), '-ibam',
                    '/abc/def/CCC.sort.primary.bam', '-bga'
                ],
                          stderr='abc',
                          stdout=m()),
                mock.call().communicate()
            ]
            self.module.calculate_coverage_data(project, component_params)

        self.module.subprocess.Popen.assert_has_calls(expected_calls)
Esempio n. 13
0
GPUs = len(tf.config.experimental.list_physical_devices('GPU'))
CPUs = len(tf.config.experimental.list_physical_devices('CPU'))

if GPUs > 0:

    print("Num GPUs Available: ", GPUs)
    print("Num CPUs Available: ", CPUs)
    config = tf.compat.v1.ConfigProto(device_count={'GPU': GPUs, 'CPU': CPUs})
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)

model = structure(3, 950, 'mean_squared_error', 'relu', 'random_uniform',
                  'adam', 0)

muestra = Sample(ratio=[0.4, 1.6], T=[0.2, 1.1], r=[0.02, 0.1], o=[0.01, 1.0])

muestra.create('train', 10**6, log=True)

x_train, y_train = muestra.open('train', log=True)

muestra.create('test', 10**6, log=True)

x_test, y_test = muestra.open('test', log=True)

muestra.create('validation', 10**5, log=True)

x_val, y_val = muestra.open('validation', log=True)


def step_decay(epoch):
    def test_system_calls_paired_experiment(self):

        mock_process = mock.Mock(name='mock_process')
        mock_process.communicate.return_value = (('', ''))
        mock_process.returncode = 0

        mock_popen = mock.Mock(name='mock_popen')
        mock_popen.return_value = mock_process

        self.module.subprocess = mock.Mock()
        self.module.subprocess.Popen = mock_popen
        self.module.subprocess.STDOUT = ''
        self.module.subprocess.PIPE = ''

        p = Params()
        cp = Params()
        p.add(gtf='/path/to/GTF/mock.gtf')
        cp.add(feature_counts='/path/to/bin/featureCounts')
        cp.add(feature_counts_file_extension='counts')
        cp.add(feature_counts_output_dir='/path/to/final/featureCounts')
        p.add(paired_alignment=True)

        s1 = Sample('A', 'X')
        s1.bamfiles = [
            '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam',
            '/path/to/bamdir/A.primary.dedup.bam'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1])

        m = mock.MagicMock(side_effect=[True, True, True])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            self.module.execute_counting(project, cp, util_methods)

            calls = [
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.counts /path/to/bamdir/A.bam',
                    shell=True,
                    stderr=self.module.subprocess.STDOUT,
                    stdout=self.module.subprocess.PIPE),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.primary.counts /path/to/bamdir/A.primary.bam',
                    shell=True,
                    stderr=self.module.subprocess.STDOUT,
                    stdout=self.module.subprocess.PIPE),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.primary.dedup.counts /path/to/bamdir/A.primary.dedup.bam',
                    shell=True,
                    stderr=self.module.subprocess.STDOUT,
                    stdout=self.module.subprocess.PIPE)
            ]
            mock_popen.assert_has_calls(calls)

        # check that the sample contains paths to the new count files in the correct locations:
        expected_files = [
            os.path.join('/path/to/final/featureCounts',
                         re.sub('bam', 'counts', os.path.basename(f)))
            for f in s1.bamfiles
        ]
        actual_files = s1.countfiles
        self.assertEqual(actual_files, expected_files)
Esempio n. 15
0
#coding=utf-8
from keras.models import load_model
from utils.sample import Sample
from utils.black_scholes import raiz_ratio, d1_ratio, call_price_ratio
from utils.biseccion import bisec
import time
import pickle
import math
import numpy as np
from os import path, mkdir, strerror
from scipy.optimize import brentq
import tensorflow as tf



opn = Sample(ratio=[0.4, 1.6], T=[0.2, 1.1], r=[0.02, 0.1], o=[0.01, 1])

opn.create('prueba', N=10**5)
x_test, y_test = opn.open('prueba')

start_time = time.time()

j = 0
fails = []
i = 0
for c, ratio, r, T in x_test:

    def f(x):
        return raiz_ratio(c, ratio, r, x, T) #x seria la variable para aplicar bisección
    
    
    def test_countfile_merging(self):
        """
		This tests that the correct files are used to merge.  The result (a data structure) of the merging is mocked out.
		Tests that the expected data is written to the file and tests that the file ends up in the correct location
		"""

        # a dummy method to mock the reading/concatenating of the data in the individual files
        def mock_read(matrix, f):
            dummy = [['geneA', '0', '100',
                      '200'], ['geneB', '1', '101', '201'],
                     ['geneC', '2', '102', '202']]
            if len(matrix) == 0:
                for k in range(len(dummy)):
                    matrix.append([])

            for i, l in enumerate(dummy):
                matrix[i] = l

        # mock out the actual implementations
        self.module.get_countfile_groupings = mock.Mock()

        self.module.get_countfile_groupings.return_value = [
            [
                '/path/to/final/featureCounts/A.counts',
                '/path/to/final/featureCounts/C.counts',
                '/path/to/final/featureCounts/B.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.counts',
                '/path/to/final/featureCounts/C.primary.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.dedup.counts',
                '/path/to/final/featureCounts/B.primary.dedup.counts',
                '/path/to/final/featureCounts/C.primary.dedup.counts'
            ]
        ]

        self.module.read = mock_read

        p = Params()
        p.add(raw_count_matrix_file_prefix='merged_counts')

        s1 = Sample('A', 'X')
        s1.countfiles = [
            '/path/to/final/featureCounts/A.primary.counts',
            '/path/to/final/featureCounts/A.counts',
            '/path/to/final/featureCounts/A.primary.dedup.counts'
        ]
        s2 = Sample('B', 'Y')
        s2.countfiles = [
            '/path/to/final/featureCounts/B.counts',
            '/path/to/final/featureCounts/B.primary.dedup.counts',
            '/path/to/final/featureCounts/B.primary.counts'
        ]
        s3 = Sample('C', 'Z')
        s3.countfiles = [
            '/path/to/final/featureCounts/C.counts',
            '/path/to/final/featureCounts/C.primary.counts',
            '/path/to/final/featureCounts/C.primary.dedup.counts'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s3, s2])

        m = mock.mock_open()
        with mock.patch.object(__builtin__, 'open', m):
            self.module.create_count_matrices(project, mock.Mock())
            m.assert_any_call(
                '/path/to/final/featureCounts/merged_counts.counts', 'w')
            m.assert_any_call(
                '/path/to/final/featureCounts/merged_counts.primary.counts',
                'w')
            m.assert_any_call(
                '/path/to/final/featureCounts/merged_counts.primary.dedup.counts',
                'w')
            handle = m()
            calls = [
                mock.call('Gene\tA\tB\tC\n'),
                mock.call('geneA\t0\t100\t200\n'),
                mock.call('geneB\t1\t101\t201\n'),
                mock.call('geneC\t2\t102\t202\n')
            ] * 3
            handle.write.assert_has_calls(calls)
Esempio n. 17
0
#coding=utf-8

from utils.structure import structure
from utils.learning_rate_finder import LearningRateFinder
from utils.sample import Sample
import pickle
import numpy as np
import matplotlib.pyplot as plt

model = structure(3, 950, 'mean_squared_error', 'relu', 'random_uniform', 'adam', 0)

lrf = LearningRateFinder(model)

muestra = Sample(ratio=[0.4, 1.6], T=[0.2, 1.1], r=[0.02, 0.1], o=[0.01, 1.0])

muestra.create('sample', 10**4, log=True)

x, y = muestra.open('sample', log=True)

lrf.find(
    x, y,
    1e-10, 1e+1,
    stepsPerEpoch=np.ceil((len(x) / float(1024))),
    batchSize=1024)

lrf.plot_loss()
#Método de smith
plt.savefig('Learning_Rate')