def create_ref_genome_from_browser_upload(request):
    """Handle request to create ReferenceGenome from local file.
    """
    project = get_object_or_404(Project, owner=request.user.get_profile(),
            uid=request.POST['projectUid'])

    uploaded_file = request.FILES['refGenomeFile']

    # Save uploaded ReferenceGenome to temp file, passing the original filename
    # as the suffix for easier debug.
    if not os.path.exists(settings.TEMP_FILE_ROOT):
        os.mkdir(settings.TEMP_FILE_ROOT)
    _, temp_file_location = tempfile.mkstemp(
        suffix='_' + uploaded_file.name,
        dir=settings.TEMP_FILE_ROOT)

    with open(temp_file_location, 'w') as temp_file_fh:
        temp_file_fh.write(request.FILES['refGenomeFile'].read())

    error_string = ''
    try:
        import_reference_genome_from_local_file(
                project,
                request.POST['refGenomeLabel'],
                temp_file_location,
                request.POST['importFileFormat'],
                move=True)
    except Exception as e:
        error_string = str(e)

    result = {
        'error': error_string,
    }

    return HttpResponse(json.dumps(result), content_type='application/json')
    def test_import_reference_genome_from_local_file(self):
        """Tests importing reference genome.
        """
        TEST_GENBANK_FILE = os.path.join(settings.PWD,
                'test_data', 'import_util_test_data', 'mini_mg1655.genbank')

        import_reference_genome_from_local_file(self.project, 'a label',
                TEST_GENBANK_FILE, 'genbank')
    def test_import_reference_genome_from_local_file(self):
        """Tests importing reference genome.
        """
        TEST_GENBANK_FILE = os.path.join(settings.PWD,
                'test_data', 'import_util_test_data', 'mini_mg1655.genbank')

        import_reference_genome_from_local_file(self.project, 'a label',
                TEST_GENBANK_FILE, 'genbank')
    def test_import_reference_genome_from_local_file__fail_if_no_seq(self):
        """Should fail if no sequence in file.
        """
        TEST_GENBANK_FILE__NO_SEQ = os.path.join(settings.PWD,
                'test_data', 'import_util_test_data', 'mg1655_no_seq.genbank')

        with self.assertRaises(DataImportError):
            import_reference_genome_from_local_file(self.project, 'a label',
                    TEST_GENBANK_FILE__NO_SEQ, 'genbank')
    def test_import_reference_genome_from_local_file__fail_if_no_seq(self):
        """Should fail if no sequence in file.
        """
        TEST_GENBANK_FILE__NO_SEQ = os.path.join(settings.PWD,
                'test_data', 'import_util_test_data', 'mg1655_no_seq.genbank')

        with self.assertRaises(DataImportError):
            import_reference_genome_from_local_file(self.project, 'a label',
                    TEST_GENBANK_FILE__NO_SEQ, 'genbank')
    def setUp(self):
        user = User.objects.create_user('test_username',
                                        password='******',
                                        email='*****@*****.**')

        # Grab a project.
        self.project = Project.objects.create(title='test project',
                                              owner=user.get_profile())

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', TEST_FASTA, 'fasta')

        self.EXPECTED_NUM_VARIANTS = 30

        # We created the test genome with these specs.
        self.EXPECTED_VARIANT_POSITIONS = [800]  # one-indexed.
        while len(
                self.EXPECTED_VARIANT_POSITIONS) < self.EXPECTED_NUM_VARIANTS:
            self.EXPECTED_VARIANT_POSITIONS.append(
                self.EXPECTED_VARIANT_POSITIONS[-1] + 20)

        self.KNOWN_SUBSTITUTIONS_ROOT = os.path.join(
            settings.PWD, 'test_data', 'test_genome_known_substitutions')

        self.TEST_GENOME_FASTA = os.path.join(
            self.KNOWN_SUBSTITUTIONS_ROOT,
            'test_genome_known_substitutions.fa')

        self.FAKE_READS_FASTQ1 = os.path.join(
            self.KNOWN_SUBSTITUTIONS_ROOT,
            'test_genome_known_substitutions_0.snps.simLibrary.1.fq')

        self.FAKE_READS_FASTQ2 = os.path.join(
            self.KNOWN_SUBSTITUTIONS_ROOT,
            'test_genome_known_substitutions_0.snps.simLibrary.2.fq')

        self.FAKE_READS_SAMPLE_UID = '93b68da4'

        self.FAKE_READS_BAM = os.path.join(
            self.KNOWN_SUBSTITUTIONS_ROOT,
            'bwa_align.sorted.grouped.realigned.bam')

        self.FAKE_READS_BAM_INDEX = os.path.join(
            self.KNOWN_SUBSTITUTIONS_ROOT,
            'bwa_align.sorted.grouped.realigned.bam.bai')

        # Create a ref genome from the above.
        self.REFERENCE_GENOME = import_reference_genome_from_local_file(
            self.project, 'test_genome', self.TEST_GENOME_FASTA, 'fasta')
    def setUp(self):
        user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD,
                email=TEST_EMAIL)
        self.project = Project.objects.create(owner=user.get_profile(),
                title='Test Project')

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', TEST_FASTA, 'fasta')

        # Create a sample.
        self.experiment_sample = ExperimentSample.objects.create(
                project=self.project, label='sample1')
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1, TEST_FASTQ1)
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2, TEST_FASTQ2)

        # Create second sample.
        self.experiment_sample_2 = ExperimentSample.objects.create(
                project=self.project, label='sample2')
        copy_and_add_dataset_source(
                self.experiment_sample_2, Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1, TEST_SAMPLE_2_FASTQ1)
        copy_and_add_dataset_source(
                self.experiment_sample_2, Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2, TEST_SAMPLE_2_FASTQ2)

        # Create a sample with a single fastq
        self.experiment_sample_single_fastq = ExperimentSample.objects.create(
                project=self.project, label='sample_single_fastq')

        # Add the fastq file to the sample
        copy_and_add_dataset_source(self.experiment_sample_single_fastq,
                Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1)
Beispiel #8
0
    def test_get_insert_size__generated_data(self):
        INVERSION_TEST_DATA_DIR = os.path.join(TEST_DATA_DIR, 'sv_testing',
                                               'inversion_5a996d78')

        INVERSION_REF = os.path.join(INVERSION_TEST_DATA_DIR, 'small_ref.fa')

        INVERSION_SAMPLE_UID = 'group'

        INVERSION_SAMPLE_BAM = os.path.join(INVERSION_TEST_DATA_DIR,
                                            'inversion_5a996d78.bam')

        reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', INVERSION_REF, 'fasta')

        alignment_group = AlignmentGroup.objects.create(
            label='test alignment', reference_genome=reference_genome)

        r = create_sample_and_alignment(self.project, alignment_group,
                                        INVERSION_SAMPLE_UID,
                                        INVERSION_SAMPLE_BAM)
        sample_alignment = r['sample_alignment']

        mean, stdev = get_insert_size_mean_and_stdev(sample_alignment)
        self.assertAlmostEqual(mean, 498, delta=2)
        self.assertAlmostEqual(stdev, 1, delta=1)
    def _import_reference(self, ref_path):

        # Import reference genome
        ref_genome = import_reference_genome_from_local_file(
                self.project, 'test_ref',
                ref_path, 'fasta', move=False)
        return ref_genome
    def _import_reference(self, ref_path):

        # Import reference genome
        ref_genome = import_reference_genome_from_local_file(
                self.project, 'test_ref',
                ref_path, 'fasta', move=False)
        return ref_genome
Beispiel #11
0
    def test_compress_dataset(self):
        """
        Make sure that compressing a dataset and putting a new dataset
        entry into the db works correctly.
        """
        user = User.objects.create_user(TEST_USERNAME,
                                        password=TEST_PASSWORD,
                                        email=TEST_EMAIL)

        self.test_project = Project.objects.create(title=TEST_PROJECT_NAME,
                                                   owner=user.get_profile())

        self.test_ref_genome = import_reference_genome_from_local_file(
            self.test_project, TEST_REF_GENOME_NAME, TEST_REF_GENOME_PATH,
            'genbank')

        dataset = get_dataset_with_type(
            self.test_ref_genome, type=Dataset.TYPE.REFERENCE_GENOME_GENBANK)

        # All the magic happens here
        compressed_dataset = dataset.make_compressed('.gz')

        # Grab the new compressed dataset through the ref genome to
        # make sure that it got added
        compressed_dataset_through_ref_genome = get_dataset_with_type(
            entity=self.test_ref_genome,
            type=Dataset.TYPE.REFERENCE_GENOME_GENBANK,
            compressed=True)
        assert compressed_dataset == compressed_dataset_through_ref_genome
    def setUp(self):
        user = User.objects.create_user('test_username',
                                        password='******',
                                        email='*****@*****.**')

        # Grab a project.
        self.project = Project.objects.create(title='snpeff test project',
                                              owner=user.get_profile())

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'snpeff test ref genome', TEST_GENBANK, 'genbank')

        # Create a new alignment group.
        self.alignment_group = AlignmentGroup.objects.create(
            label='test alignment', reference_genome=self.reference_genome)

        # Create a sample.
        self.sample_1 = ExperimentSample.objects.create(project=self.project,
                                                        label='test sample 1')

        # Create relationship between alignment and sample.
        self.sample_alignment = ExperimentSampleToAlignment.objects.create(
            alignment_group=self.alignment_group,
            experiment_sample=self.sample_1)

        # Add unannotated SNP data.
        self.vcf_dataset = Dataset.objects.create(
            type=Dataset.TYPE.VCF_FREEBAYES,
            label=Dataset.TYPE.VCF_FREEBAYES,
            filesystem_location=TEST_UNANNOTATED_VCF)
        self.alignment_group.dataset_set.add(self.vcf_dataset)
    def setUp(self):
        user = User.objects.create_user('test_username', password='******',
                email='*****@*****.**')

        # Grab a project.
        self.project = Project.objects.create(title='snpeff test project',
                owner=user.get_profile())

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'snpeff test ref genome', TEST_GENBANK, 'genbank')

        # Create a new alignment group.
        self.alignment_group = AlignmentGroup.objects.create(
                label='test alignment', reference_genome=self.reference_genome)

        # Create a sample.
        self.sample_1 = ExperimentSample.objects.create(
                project=self.project,
                label='test sample 1')

        # Create relationship between alignment and sample.
        self.sample_alignment = ExperimentSampleToAlignment.objects.create(
                alignment_group=self.alignment_group,
                experiment_sample=self.sample_1)

        # Add unannotated SNP data.
        self.vcf_dataset = Dataset.objects.create(
                type=VCF_DATASET_TYPE,
                label=VCF_DATASET_TYPE,
                filesystem_location=TEST_UNANNOTATED_VCF)
        self.alignment_group.dataset_set.add(self.vcf_dataset)
    def test_run_pipeline__snps_with_effect__no_svs(self):
        """Tests pipeline with SNPs with effect, but no SVs called.
        """
        ref_genome = import_reference_genome_from_local_file(
            self.project, 'mg1655_tolC_through_zupT',
            FullVCFTestSet.TEST_GENBANK, 'genbank')

        sample_obj = ExperimentSample.objects.create(project=self.project,
                                                     label='Sample %d' % 0)

        # Add raw reads to each sample.
        copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ1,
                                    Dataset.TYPE.FASTQ1,
                                    FullVCFTestSet.FASTQ1[0])
        copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ2,
                                    Dataset.TYPE.FASTQ2,
                                    FullVCFTestSet.FASTQ2[0])

        result = run_pipeline('test_align', ref_genome, [sample_obj])

        alignment_group = result[0]
        alignment_async_result = result[1]
        variant_calling_async_result = result[2]
        alignment_async_result.get()
        variant_calling_async_result.get()
        alignment_group = AlignmentGroup.objects.get(uid=alignment_group.uid)
        self.assertEqual(AlignmentGroup.STATUS.COMPLETED,
                         alignment_group.status)

        # Check that SnpEff worked.
        v_205 = Variant.objects.get(
            reference_genome=alignment_group.reference_genome, position=205)
        v_205_va = v_205.variantalternate_set.all()[0]
        self.assertEqual('tolC', v_205_va.data['INFO_EFF_GENE'])
    def setUp(self):
        user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD,
                email=TEST_EMAIL)
        self.project = Project.objects.create(owner=user.get_profile(),
                title='Test Project')

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', TEST_FASTA, 'fasta')

        # Create a sample.
        self.experiment_sample = ExperimentSample.objects.create(
                project=self.project, label='sample1')

        # Create a sample for compressed fastq data.
        self.compressed_experiment_sample = ExperimentSample.objects.create(
                project=self.project, label='sample1')

        # Add fastq files to first sample.
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1, TEST_FASTQ1)
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2, TEST_FASTQ2)

        # Add compressed fastq files to second sample.
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1_GZ)
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2_GZ)
Beispiel #16
0
    def setUp(self):
        user = User.objects.create_user(TEST_USERNAME,
                                        password=TEST_PASSWORD,
                                        email=TEST_EMAIL)
        self.project = Project.objects.create(owner=user.get_profile(),
                                              title='Test Project')

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', TEST_FASTA, 'fasta')

        # Create a sample.
        self.experiment_sample = ExperimentSample.objects.create(
            project=self.project, label='sample1')

        # Create a sample for compressed fastq data.
        self.compressed_experiment_sample = ExperimentSample.objects.create(
            project=self.project, label='sample1')

        # Add fastq files to first sample.
        copy_and_add_dataset_source(self.experiment_sample,
                                    Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1,
                                    TEST_FASTQ1)
        copy_and_add_dataset_source(self.experiment_sample,
                                    Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2,
                                    TEST_FASTQ2)

        # Add compressed fastq files to second sample.
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                                    Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1,
                                    TEST_FASTQ1_GZ)
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                                    Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2,
                                    TEST_FASTQ2_GZ)
Beispiel #17
0
    def setUp(self):
        user = User.objects.create_user('test_username_sv', password='******',
                email='*****@*****.**')

        # Grab a project.
        self.project = Project.objects.create(title='test project',
                owner=user.get_profile())

        # Use genome with deletion from our sv testing repo:
        # https://github.com/churchlab/structural-variants-testing
        DELETION_TEST_DATA_DIR = os.path.join(settings.PWD, 'test_data',
                'sv_testing', 'deletion_bd5a1123')
        REF = os.path.join(DELETION_TEST_DATA_DIR, 'small_ref.fa')
        FASTQ1 = os.path.join(DELETION_TEST_DATA_DIR, 'deletion_bd5a1123.1.fq')
        FASTQ2 = os.path.join(DELETION_TEST_DATA_DIR, 'deletion_bd5a1123.2.fq')

        # Create Datasets / import data.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', REF, 'fasta')
        self.experiment_sample = ExperimentSample.objects.create(
                project=self.project, label='sample1')
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1, FASTQ1)
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2, FASTQ2)
Beispiel #18
0
    def test_compress_dataset(self):
        """
        Make sure that compressing a dataset and putting a new dataset
        entry into the db works correctly.
        """
        user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD,
                email=TEST_EMAIL)

        self.test_project = Project.objects.create(
            title=TEST_PROJECT_NAME,
            owner=user.get_profile())

        self.test_ref_genome = import_reference_genome_from_local_file(
            self.test_project,
            TEST_REF_GENOME_NAME,
            TEST_REF_GENOME_PATH,
            'genbank')

        dataset = get_dataset_with_type(self.test_ref_genome,
                type= Dataset.TYPE.REFERENCE_GENOME_GENBANK)

        # All the magic happens here
        compressed_dataset = dataset.make_compressed('.gz')

        # Grab the new compressed dataset through the ref genome to
        # make sure that it got added
        compressed_dataset_through_ref_genome = get_dataset_with_type(
                entity= self.test_ref_genome,
                type= Dataset.TYPE.REFERENCE_GENOME_GENBANK,
                compressed= True)
        assert compressed_dataset == compressed_dataset_through_ref_genome
    def test_get_insert_size__generated_data(self):
        INVERSION_TEST_DATA_DIR = os.path.join(
                TEST_DATA_DIR, 'sv_testing', 'inversion_5a996d78')

        INVERSION_REF = os.path.join(INVERSION_TEST_DATA_DIR, 'small_ref.fa')

        INVERSION_SAMPLE_UID = 'group'

        INVERSION_SAMPLE_BAM = os.path.join(INVERSION_TEST_DATA_DIR,
                'inversion_5a996d78.bam')

        reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', INVERSION_REF, 'fasta')

        alignment_group = AlignmentGroup.objects.create(
                label='test alignment', reference_genome=reference_genome)

        r = create_sample_and_alignment(
                self.project, alignment_group, INVERSION_SAMPLE_UID,
                INVERSION_SAMPLE_BAM)
        sample_alignment = r['sample_alignment']

        mean, stdev = get_insert_size_mean_and_stdev(sample_alignment)
        self.assertAlmostEqual(mean, 498, delta=2)
        self.assertAlmostEqual(stdev, 1, delta=1)
Beispiel #20
0
    def test_run_lumpy__multiple_samples_of_same_exact_deletion(self):
        """Tests lumpy running on multiple samples.
        """
        # Create Datasets / import data.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', DELETION_REF, 'fasta')

        # Create an alignment that's already complete, so we can focus on
        # testing variant calling only.
        self.alignment_group = AlignmentGroup.objects.create(
            label='test alignment', reference_genome=self.reference_genome)

        r1 = _create_sample_and_alignment(self.project, self.alignment_group,
                                          DELETION_SAMPLE_1_UID,
                                          DELETION_SAMPLE_1_BWA)
        sa1 = r1['sample_alignment']

        r2 = _create_sample_and_alignment(self.project, self.alignment_group,
                                          DELETION_SAMPLE_2_UID,
                                          DELETION_SAMPLE_2_BWA)
        sa2 = r2['sample_alignment']

        r3 = _create_sample_and_alignment(self.project, self.alignment_group,
                                          DELETION_SAMPLE_3_UID,
                                          DELETION_SAMPLE_3_BWA)
        sa3 = r3['sample_alignment']

        r4 = _create_sample_and_alignment(self.project, self.alignment_group,
                                          DELETION_SAMPLE_4_UID,
                                          DELETION_SAMPLE_4_BWA)
        sa4 = r4['sample_alignment']

        # Common params for each run of lumpy.
        lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY])

        def _run_lumpy_for_sample_alignment(sa):
            """Helper function to run lumpy for sample alignment.
            """
            lumpy_params['tool_kwargs'] = {
                'region_num': sa.uid,
                'sample_alignments': [sa]
            }
            find_variants_with_tool(self.alignment_group,
                                    lumpy_params,
                                    project=self.project)

        _run_lumpy_for_sample_alignment(sa1)
        _run_lumpy_for_sample_alignment(sa2)
        _run_lumpy_for_sample_alignment(sa3)
        _run_lumpy_for_sample_alignment(sa4)

        merge_lumpy_vcf(self.alignment_group)

        # Grab the resulting variants.
        variants = Variant.objects.filter(
            reference_genome=self.reference_genome)

        # Should have 2 events.
        self.assertEqual(2, len(variants))
 def setUp(self):
     # Test models.
     user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD,
             email=TEST_EMAIL)
     self.project = Project.objects.create(owner=user.get_profile(),
             title='Test Project')
     self.reference_genome = import_reference_genome_from_local_file(
             self.project, 'ref_genome', TEST_FASTA, 'fasta')
Beispiel #22
0
    def test_run_lumpy__multiple_samples_of_same_exact_deletion(self):
        """Tests lumpy running on multiple samples.
        """
        # Create Datasets / import data.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', DELETION_REF, 'fasta')

        # Create an alignment that's already complete, so we can focus on
        # testing variant calling only.
        self.alignment_group = AlignmentGroup.objects.create(
                label='test alignment', reference_genome=self.reference_genome)

        r1 = _create_sample_and_alignment(
                self.project, self.alignment_group, DELETION_SAMPLE_1_UID,
                DELETION_SAMPLE_1_BWA)
        sa1 = r1['sample_alignment']

        r2 = _create_sample_and_alignment(
                self.project, self.alignment_group, DELETION_SAMPLE_2_UID,
                DELETION_SAMPLE_2_BWA)
        sa2 = r2['sample_alignment']

        r3 = _create_sample_and_alignment(
                self.project, self.alignment_group, DELETION_SAMPLE_3_UID,
                DELETION_SAMPLE_3_BWA)
        sa3 = r3['sample_alignment']

        r4 = _create_sample_and_alignment(
                self.project, self.alignment_group, DELETION_SAMPLE_4_UID,
                DELETION_SAMPLE_4_BWA)
        sa4 = r4['sample_alignment']

        # Common params for each run of lumpy.
        lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY])

        def _run_lumpy_for_sample_alignment(sa):
            """Helper function to run lumpy for sample alignment.
            """
            lumpy_params['tool_kwargs'] = {
                'region_num': sa.uid,
                'sample_alignments': [sa]
            }
            find_variants_with_tool(
                    self.alignment_group, lumpy_params, project=self.project)

        _run_lumpy_for_sample_alignment(sa1)
        _run_lumpy_for_sample_alignment(sa2)
        _run_lumpy_for_sample_alignment(sa3)
        _run_lumpy_for_sample_alignment(sa4)

        merge_lumpy_vcf(self.alignment_group)

        # Grab the resulting variants.
        variants = Variant.objects.filter(
                reference_genome=self.reference_genome)

        # Should have 2 events.
        self.assertEqual(2, len(variants))
Beispiel #23
0
    def setUp(self):
        user = User.objects.create_user(TEST_USERNAME,
                                        password=TEST_PASSWORD,
                                        email=TEST_EMAIL)
        self.project = Project.objects.create(owner=user.get_profile(),
                                              title='Test Project')

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', TEST_FASTA, 'fasta')

        # Create a sample.
        self.experiment_sample = ExperimentSample.objects.create(
            project=self.project, label='sample1')

        # Create a sample for compressed fastq data.
        self.compressed_experiment_sample = ExperimentSample.objects.create(
            project=self.project, label='sample1')

        # Add fastq files to first sample.
        copy_and_add_dataset_source(self.experiment_sample,
                                    Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1,
                                    TEST_FASTQ1)
        copy_and_add_dataset_source(self.experiment_sample,
                                    Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2,
                                    TEST_FASTQ2)

        # Add compressed fastq files to second sample.
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                                    Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1,
                                    TEST_FASTQ1_GZ)
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                                    Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2,
                                    TEST_FASTQ2_GZ)

        # Create a new alignment group.
        alignment_group = AlignmentGroup.objects.create(
            label='test alignment', reference_genome=self.reference_genome)

        # Create the expected models.
        sample_alignment = ExperimentSampleToAlignment.objects.create(
            alignment_group=alignment_group,
            experiment_sample=self.experiment_sample)

        bwa_dataset = copy_and_add_dataset_source(
            sample_alignment,
            dataset_label=Dataset.TYPE.BWA_ALIGN,
            dataset_type=Dataset.TYPE.BWA_ALIGN,
            original_source_location=TEST_DISC_SPLIT_BAM)

        bwa_dataset.status = status = Dataset.STATUS.READY
        bwa_dataset.save()

        index_bam_file(bwa_dataset.get_absolute_location())

        self.bwa_dataset = bwa_dataset
        self.sample_alignment = sample_alignment
    def setUp(self):
        user = User.objects.create_user('test_username', password='******',
                email='*****@*****.**')

        # Grab a project.
        self.project = Project.objects.create(title='test project',
                owner=user.get_profile())

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', TEST_FASTA, 'fasta')

        self.EXPECTED_NUM_VARIANTS = 30

        # We created the test genome with these specs.
        self.EXPECTED_VARIANT_POSITIONS = [800] # one-indexed.
        while len(self.EXPECTED_VARIANT_POSITIONS) < self.EXPECTED_NUM_VARIANTS:
            self.EXPECTED_VARIANT_POSITIONS.append(
                    self.EXPECTED_VARIANT_POSITIONS[-1] + 20)

        self.KNOWN_SUBSTITUTIONS_ROOT = os.path.join(settings.PWD, 'test_data',
                'test_genome_known_substitutions')

        self.TEST_GENOME_FASTA = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT,
                'test_genome_known_substitutions.fa')

        self.FAKE_READS_FASTQ1 = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT,
                'test_genome_known_substitutions_0.snps.simLibrary.1.fq')

        self.FAKE_READS_FASTQ2 = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT,
                'test_genome_known_substitutions_0.snps.simLibrary.2.fq')

        self.FAKE_READS_SAMPLE_UID = '93b68da4'

        self.FAKE_READS_BAM = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT,
                'bwa_align.sorted.grouped.realigned.bam')

        self.FAKE_READS_BAM_INDEX = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT,
                'bwa_align.sorted.grouped.realigned.bam.bai')

        # Create a ref genome from the above.
        self.REFERENCE_GENOME = import_reference_genome_from_local_file(
                self.project, 'test_genome', self.TEST_GENOME_FASTA, 'fasta')
Beispiel #25
0
 def setUp(self):
     self.common_entities = create_common_entities()
     self.ref_genome = import_reference_genome_from_local_file(
             self.common_entities['project'], 'ref_genome', TEST_FASTA,
             'fasta')
     ref_genome_source = self.ref_genome.dataset_set.get(
             type=Dataset.TYPE.REFERENCE_GENOME_FASTA)\
                     .get_absolute_location()
     with open(ref_genome_source) as fh:
         self.ref_genome_seq_record = SeqIO.read(fh, 'fasta')
    def test_basic(self):
        """Basic test.
        """
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', TEST_GENBANK, 'genbank')
        variant_set = VariantSet.objects.create(
                reference_genome=self.reference_genome,
                label='vs1')

        ref_genome_filepath = get_dataset_with_type(self.reference_genome,
                Dataset.TYPE.REFERENCE_GENOME_GENBANK).get_absolute_location()

        with open(ref_genome_filepath) as fh:
            ref_genome_seq_record = SeqIO.read(fh, 'genbank')

        for position in range(10, 111, 10):
            ref_value = ref_genome_seq_record[position - 1]
            var = Variant.objects.create(
                    type=Variant.TYPE.TRANSITION,
                    reference_genome=self.reference_genome,
                    chromosome=Chromosome.objects.get(reference_genome=self.reference_genome),
                    position=position,
                    ref_value=ref_value)

            VariantAlternate.objects.create(
                    variant=var, alt_value='G')

            VariantToVariantSet.objects.create(
                    variant=var, variant_set=variant_set)

        new_ref_genome_params = {
            'label': 'new'
        }

        new_ref_genome = generate_new_reference_genome(
                variant_set, new_ref_genome_params)

        new_ref_genome_filepath = get_dataset_with_type(
                        new_ref_genome,
                        Dataset.TYPE.REFERENCE_GENOME_GENBANK)\
                .get_absolute_location()
        with open(new_ref_genome_filepath) as fh:
            new_ref_genome_seq_record = SeqIO.read(fh, 'genbank')

        # Assert size unchangd.
        self.assertEqual(len(new_ref_genome_seq_record),
                len(ref_genome_seq_record))

        # Assert mutations are there.
        for position in range(10, 111, 10):
            self.assertEqual('G', str(new_ref_genome_seq_record[position - 1]))

        # Assert new genome is annotated.
        self.assertTrue(new_ref_genome.is_annotated())
    def setUp(self):
        user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD,
                email=TEST_EMAIL)
        self.project = Project.objects.create(owner=user.get_profile(),
                title='Test Project')

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', TEST_FASTA, 'fasta')

        # Create a sample.
        self.experiment_sample = ExperimentSample.objects.create(
                project=self.project, label='sample1')

        # Create a sample for compressed fastq data.
        self.compressed_experiment_sample = ExperimentSample.objects.create(
                project=self.project, label='sample1')

        # Add fastq files to first sample.
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1, TEST_FASTQ1)
        copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2, TEST_FASTQ2)

        # Add compressed fastq files to second sample.
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1_GZ)
        copy_and_add_dataset_source(self.compressed_experiment_sample,
                Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2_GZ)


        # Create a new alignment group.
        alignment_group = AlignmentGroup.objects.create(
                label='test alignment', reference_genome=self.reference_genome)

        # Create the expected models.
        sample_alignment = ExperimentSampleToAlignment.objects.create(
                alignment_group=alignment_group,
                experiment_sample=self.experiment_sample)

        bwa_dataset = copy_and_add_dataset_source(
                sample_alignment,
                dataset_label=Dataset.TYPE.BWA_ALIGN,
                dataset_type=Dataset.TYPE.BWA_ALIGN,
                original_source_location=TEST_DISC_SPLIT_BAM)

        bwa_dataset.status = status=Dataset.STATUS.READY
        bwa_dataset.save()

        index_bam_file(bwa_dataset.get_absolute_location())

        self.bwa_dataset = bwa_dataset
        self.sample_alignment = sample_alignment
Beispiel #28
0
    def setUp(self):

        user = User.objects.create_user('test_username', password='******',
                email='*****@*****.**')

        # Grab a project.
        self.project = Project.objects.create(title='test project',
                owner=user.get_profile())

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', TEST_FASTA, 'fasta')
def create_ref_genome_from_server_location(request):
    """Handle request to create ReferenceGenome from local file.
    """
    project = get_object_or_404(Project, owner=request.user.get_profile(),
            uid=request.POST['projectUid'])

    error_string = ''
    try:
        import_reference_genome_from_local_file(
                project,
                request.POST['refGenomeLabel'],
                request.POST['refGenomeFileLocation'],
                request.POST['importFileFormat'])
    except Exception as e:
        error_string = str(e)

    result = {
        'error': error_string,
    }

    return HttpResponse(json.dumps(result), content_type='application/json')
Beispiel #30
0
    def test_run_lumpy__deletion(self):
        """Tests running Lumpy on data that should have a deletion.
        """
        # Create Datasets / import data.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', DELETION_REF_GENBANK, 'genbank')

        # Create an alignment that's already complete, so we can focus on
        # testing variant calling only.
        self.alignment_group = AlignmentGroup.objects.create(
                label='test alignment', reference_genome=self.reference_genome)

        r = _create_sample_and_alignment(
                self.project, self.alignment_group, DELETION_SAMPLE_1_UID,
                DELETION_SAMPLE_1_BWA)
        sample_alignment = r['sample_alignment']

        # Run lumpy.
        lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY])
        lumpy_params['tool_kwargs'] = {
            'region_num': sample_alignment.uid,
            'sample_alignments': [sample_alignment]
        }
        find_variants_with_tool(
                self.alignment_group, lumpy_params, project=self.project)
        merge_lumpy_vcf(self.alignment_group)

        # Grab the resulting variants.
        variants = Variant.objects.filter(
                reference_genome=self.reference_genome)

        # Verify that we have the expected deletion around position 10000 of
        # size 1000.
        self.assertEqual(1, len(variants))
        v = variants[0]

        # position/ref
        self.assertTrue(9950 < v.position < 10050)
        self.assertEqual(SV_REF_VALUE, v.ref_value)

        vccd = v.variantcallercommondata_set.all()[0]

        # size
        size = vccd.data['INFO_END'] - v.position
        self.assertTrue(900 < size < 1100)

        va = v.variantalternate_set.all()[0]

        # Type
        self.assertEqual('DEL', va.data['INFO_SVTYPE'])

        # SnpEff data
        self.assertEqual('geneX', va.data['INFO_EFF_GENE'])
    def test_basic(self):
        """Basic test.
        """
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', TEST_GENBANK, 'genbank')
        variant_set = VariantSet.objects.create(
            reference_genome=self.reference_genome, label='vs1')

        ref_genome_filepath = get_dataset_with_type(
            self.reference_genome,
            Dataset.TYPE.REFERENCE_GENOME_GENBANK).get_absolute_location()

        with open(ref_genome_filepath) as fh:
            ref_genome_seq_record = SeqIO.read(fh, 'genbank')

        for position in range(10, 111, 10):
            ref_value = ref_genome_seq_record[position - 1]
            var = Variant.objects.create(
                type=Variant.TYPE.TRANSITION,
                reference_genome=self.reference_genome,
                chromosome=Chromosome.objects.get(
                    reference_genome=self.reference_genome),
                position=position,
                ref_value=ref_value)

            VariantAlternate.objects.create(variant=var, alt_value='G')

            VariantToVariantSet.objects.create(variant=var,
                                               variant_set=variant_set)

        new_ref_genome_params = {'label': 'new'}

        new_ref_genome = generate_new_reference_genome(variant_set,
                                                       new_ref_genome_params)

        new_ref_genome_filepath = get_dataset_with_type(
                        new_ref_genome,
                        Dataset.TYPE.REFERENCE_GENOME_GENBANK)\
                .get_absolute_location()
        with open(new_ref_genome_filepath) as fh:
            new_ref_genome_seq_record = SeqIO.read(fh, 'genbank')

        # Assert size unchangd.
        self.assertEqual(len(new_ref_genome_seq_record),
                         len(ref_genome_seq_record))

        # Assert mutations are there.
        for position in range(10, 111, 10):
            self.assertEqual('G', str(new_ref_genome_seq_record[position - 1]))

        # Assert new genome is annotated.
        self.assertTrue(new_ref_genome.is_annotated())
Beispiel #32
0
    def test_generate_genbank_mobile_element_multifasta(self):
        """Test generation of the mobile element fasta.
        """
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', TEST_GENBANK, 'genbank')
        self.reference_genome.ensure_mobile_element_multifasta()

        me_fa_dataset = get_dataset_with_type(
                self.reference_genome,
                Dataset.TYPE.MOBILE_ELEMENT_FASTA)

        assert os.path.exists(
                me_fa_dataset.get_absolute_location())
Beispiel #33
0
 def test_run_pipeline__bad_alignment(self):
     """Alignment of bad reads. Might happen if user tries to align wrong
     reads to wrong reference genome.
     """
     ref_genome = import_reference_genome_from_local_file(
             self.project, 'concat_mg1655_partials',
             FullVCFTestSet.TEST_CONCAT_GENBANK, 'genbank')
     sample_list = [self.experiment_sample]
     # NOTE: Ideally there would be a better way to test this.
     # In general, we need to figure out how to better communicate the reason
     # for a failed alignment to the user.
     with self.assertRaises(Exception):
         run_pipeline('name_placeholder', ref_genome, sample_list)
 def test_run_pipeline__bad_alignment(self):
     """Alignment of bad reads. Might happen if user tries to align wrong
     reads to wrong reference genome.
     """
     ref_genome = import_reference_genome_from_local_file(
         self.project, 'concat_mg1655_partials',
         FullVCFTestSet.TEST_CONCAT_GENBANK, 'genbank')
     sample_list = [self.experiment_sample]
     # NOTE: Ideally there would be a better way to test this.
     # In general, we need to figure out how to better communicate the reason
     # for a failed alignment to the user.
     with self.assertRaises(Exception):
         run_pipeline('name_placeholder', ref_genome, sample_list)
    def setUp(self):

        user = User.objects.create_user('test_username',
                                        password='******',
                                        email='*****@*****.**')

        # Grab a project.
        self.project = Project.objects.create(title='test project',
                                              owner=user.get_profile())

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', TEST_FASTA, 'fasta')
Beispiel #36
0
    def test_run_lumpy__deletion(self):
        """Tests running Lumpy on data that should have a deletion.
        """
        # Create Datasets / import data.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', DELETION_REF_GENBANK, 'genbank')

        # Create an alignment that's already complete, so we can focus on
        # testing variant calling only.
        self.alignment_group = AlignmentGroup.objects.create(
            label='test alignment', reference_genome=self.reference_genome)

        r = _create_sample_and_alignment(self.project, self.alignment_group,
                                         DELETION_SAMPLE_1_UID,
                                         DELETION_SAMPLE_1_BWA)
        sample_alignment = r['sample_alignment']

        # Run lumpy.
        lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY])
        lumpy_params['tool_kwargs'] = {
            'region_num': sample_alignment.uid,
            'sample_alignments': [sample_alignment]
        }
        find_variants_with_tool(self.alignment_group,
                                lumpy_params,
                                project=self.project)
        merge_lumpy_vcf(self.alignment_group)

        # Grab the resulting variants.
        variants = Variant.objects.filter(
            reference_genome=self.reference_genome)

        # Verify that we have the expected deletion around position 10000 of
        # size 1000.
        self.assertEqual(1, len(variants))
        v = variants[0]

        # position/ref
        self.assertTrue(9950 < v.position < 10050)
        self.assertEqual(SV_REF_VALUE, v.ref_value)

        vccd = v.variantcallercommondata_set.all()[0]

        # size
        size = vccd.data['INFO_END'] - v.position
        self.assertTrue(900 < size < 1100)

        va = v.variantalternate_set.all()[0]

        # Type
        self.assertEqual('DEL', va.data['INFO_SVTYPE'])
Beispiel #37
0
    def setUp(self):
        common_entities = create_common_entities()
        self.project = common_entities['project']
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', TEST_FASTA, 'fasta')

        self.experiment_sample = ExperimentSample.objects.create(
            project=self.project, label='sample1')
        copy_and_add_dataset_source(self.experiment_sample,
                                    Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1,
                                    TEST_FASTQ1)
        copy_and_add_dataset_source(self.experiment_sample,
                                    Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2,
                                    TEST_FASTQ2)
    def _perform_assembly(self, data_dict):

        ref_fasta = data_dict['ref_fasta']
        fq_1 = data_dict['fq_1']
        fq_2 = data_dict['fq_2']

        # Import reference genome
        ref_genome = import_reference_genome_from_local_file(
                self.project, 'test_ref',
                ref_fasta, 'fasta', move=False)

        # Create sample model
        sample = ExperimentSample.objects.create(
                project=self.project,
                label='test_sample')

        # Add fastq datasets to sample
        add_dataset_to_entity(
                sample,
                Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1,
                filesystem_location=fq_1)
        add_dataset_to_entity(
                sample,
                Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2,
                filesystem_location=fq_2)

        # Run alignment of sample to reference
        alignment_group_label = 'test_alignment'
        sample_list = [sample]
        alignment_group, _, _ = run_pipeline(
                alignment_group_label, ref_genome, sample_list,
                perform_variant_calling=False, alignment_options={})

        # Get resulting ExperimentSampleToAlignment
        sample_align = ExperimentSampleToAlignment.objects.get(
                alignment_group=alignment_group,
                experiment_sample=sample)

        # Run pipeline and wait on result
        async_result = run_de_novo_assembly_pipeline([sample_align])
        async_result.get()

        # Retrieve contigs
        contigs = Contig.objects.filter(
                parent_reference_genome=ref_genome,
                experiment_sample_to_alignment=sample_align)

        return contigs
    def test_run_pipeline__multiple_chromosomes(self):
        """Makes sure variant calling works when there are multiple chromosomes
        on a single reference genome.
        """
        ref_genome = import_reference_genome_from_local_file(
                self.project, 'concat_mg1655_partials',
                FullVCFTestSet.TEST_CONCAT_GENBANK, 'genbank')


        sample_obj = ExperimentSample.objects.create(
                project=self.project,
                label='Sample 0')

        # Add raw reads to each sample.
        copy_and_add_dataset_source(sample_obj,
                Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1,
                FullVCFTestSet.FASTQ1[0])
        copy_and_add_dataset_source(sample_obj,
                Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2,
                FullVCFTestSet.FASTQ2[0])

        sample_list = [sample_obj]

        result = run_pipeline(
                'name_placeholder', ref_genome, sample_list)
        alignment_group = result[0]
        alignment_async_result = result[1]
        variant_calling_async_result = result[2]
        alignment_async_result.get()
        variant_calling_async_result.get()
        alignment_group = AlignmentGroup.objects.get(uid=alignment_group.uid)
        self.assertEqual(AlignmentGroup.STATUS.COMPLETED,
                alignment_group.status)

        # Validate that all variants calld.
        # TODO: Add Chromosome checking.

        v_515 = Variant.objects.get(
                reference_genome=alignment_group.reference_genome, position=515)
        v_515_va = v_515.variantalternate_set.all()[0]
        self.assertEqual('ygiB', v_515_va.data['INFO_EFF_GENE'])

        v_205 = Variant.objects.get(
                reference_genome=alignment_group.reference_genome, position=205)
        v_205_va = v_205.variantalternate_set.all()[0]
        self.assertEqual('tolC', v_205_va.data['INFO_EFF_GENE'])
Beispiel #40
0
    def test_parser__sv_lumpy(self):
        """Tests parsing lumpy output which contains SV data.
        """
        DELETION_TEST_DATA_DIR = os.path.join(TEST_DATA_DIR,
                'sv_testing', 'deletion_bd5a1123')
        DELETION_REF_FASTA = os.path.join(
                DELETION_TEST_DATA_DIR, 'small_ref.fa')

        DELETION_SAMPLE_1_UID = 'ds1'
        DELETION_SAMPLE_2_UID = 'ds2'
        DELETION_SAMPLE_3_UID = 'ds3'
        DELETION_SAMPLE_4_UID = 'f8346a99'

        reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', DELETION_REF_FASTA, 'fasta')

        alignment_group = AlignmentGroup.objects.create(
                label='Alignment 1', reference_genome=reference_genome,
                aligner=AlignmentGroup.ALIGNER.BWA)

        # Connect lumpy vcf as Dataset.
        lumpy_vcf_dataset = copy_and_add_dataset_source(
                alignment_group, Dataset.TYPE.VCF_LUMPY, Dataset.TYPE.VCF_LUMPY,
                LUMPY_4_SAMPLES_2_DELETIONS_VCF)

        # Create samples corresponding to sample ids in vcf.
        create_sample_and_alignment(
                self.project, alignment_group, DELETION_SAMPLE_1_UID)
        create_sample_and_alignment(
                self.project, alignment_group, DELETION_SAMPLE_2_UID)
        create_sample_and_alignment(
                self.project, alignment_group, DELETION_SAMPLE_3_UID)
        create_sample_and_alignment(
                self.project, alignment_group, DELETION_SAMPLE_4_UID)

        # Now we have everything we need to parse the vcf.
        parse_vcf(lumpy_vcf_dataset, alignment_group)

        # Check expected variants.
        v_4998 = Variant.objects.get(
                reference_genome=reference_genome, position=4998)
        v_4998_vccd = v_4998.variantcallercommondata_set.all()[0]
        self.assertTrue(v_4998_vccd.data['IS_SV'])

        v_9999 = Variant.objects.get(
                reference_genome=reference_genome, position=9999)
        v_9999_vccd = v_9999.variantcallercommondata_set.all()[0]
        self.assertTrue(v_9999_vccd.data['IS_SV'])
def main():
    # Create a User and Project.
    user = get_or_create_user()
    test_project = Project.objects.create(title=EXAMPLE_PROJECT_NAME,
                                          owner=user.get_profile())
    ref_genome = import_reference_genome_from_local_file(test_project,
                                                         'mg1655',
                                                         MG1655_REF_GENOME,
                                                         'genbank',
                                                         move=False)

    # Create alignment group and and relate the vcf Dataset to it.
    alignment_group = AlignmentGroup.objects.create(
        label='Fix Recoli Alignment',
        reference_genome=ref_genome,
        aligner=AlignmentGroup.ALIGNER.BWA)
    vcf_output_path = get_snpeff_vcf_output_path(alignment_group,
                                                 Dataset.TYPE.BWA_ALIGN)
    shutil.copy(LARGE_VCF, vcf_output_path)
    dataset = Dataset.objects.create(
        type=Dataset.TYPE.VCF_FREEBAYES_SNPEFF,
        label=Dataset.TYPE.VCF_FREEBAYES_SNPEFF,
        filesystem_location=clean_filesystem_location(vcf_output_path),
    )
    alignment_group.dataset_set.add(dataset)

    # Import ExperimentSampleo objects, setting specific uid to match
    # the vcf file.
    with open(EXPERIMENT_SAMPLE_MODEL_DATA_PICKLE) as sample_data_fh:
        es_data = pickle.load(sample_data_fh)
        for es in es_data:
            es_obj = ExperimentSample.objects.create(uid=es.uid,
                                                     project=test_project,
                                                     label=es.label)

            es_obj.data.update({
                'group': es.group,
                'well': es.well,
                'num_reads': es.num_reads
            })
            es_obj.save()

    parse_alignment_group_vcf(alignment_group,
                              Dataset.TYPE.VCF_FREEBAYES_SNPEFF)
Beispiel #42
0
    def test_run_lumpy__inversion(self):
        """Tests running Lumpy on data with single inversion.
        """
        # Create Datasets / import data.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', INVERSION_REF, 'fasta')

        # Create an alignment that's already complete, so we can focus on
        # testing variant calling only.
        self.alignment_group = AlignmentGroup.objects.create(
            label='test alignment', reference_genome=self.reference_genome)

        r = _create_sample_and_alignment(self.project, self.alignment_group,
                                         INVERSION_SAMPLE_UID,
                                         INVERSION_SAMPLE_BWA)
        sample_alignment = r['sample_alignment']

        # Run lumpy.
        lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY])
        lumpy_params['tool_kwargs'] = {
            'region_num': sample_alignment.uid,
            'sample_alignments': [sample_alignment]
        }
        find_variants_with_tool(self.alignment_group,
                                lumpy_params,
                                project=self.project)
        merge_lumpy_vcf(self.alignment_group)

        # Grab the resulting variants.
        variants = Variant.objects.filter(
            reference_genome=self.reference_genome)

        self.assertEqual(1, len(variants))

        v = variants[0]

        # position
        self.assertAlmostEqual(v.position, 30000, delta=2)

        # size
        vccd = v.variantcallercommondata_set.all()[0]
        size = vccd.data['INFO_END'] - v.position
        self.assertAlmostEqual(size, 1000, delta=10)
def main():
    # Create a User and Project.
    user = get_or_create_user()
    test_project = Project.objects.create(
            title=EXAMPLE_PROJECT_NAME, owner=user.get_profile())
    ref_genome = import_reference_genome_from_local_file(test_project,
            'mg1655', MG1655_REF_GENOME, 'genbank', move=False)

    # Create alignment group and and relate the vcf Dataset to it.
    alignment_group = AlignmentGroup.objects.create(
            label='Fix Recoli Alignment',
            reference_genome=ref_genome,
            aligner=AlignmentGroup.ALIGNER.BWA)
    vcf_output_path = get_snpeff_vcf_output_path(alignment_group,
            Dataset.TYPE.BWA_ALIGN)
    shutil.copy(LARGE_VCF, vcf_output_path)
    dataset = Dataset.objects.create(
            type=Dataset.TYPE.VCF_FREEBAYES_SNPEFF,
            label=Dataset.TYPE.VCF_FREEBAYES_SNPEFF,
            filesystem_location=clean_filesystem_location(vcf_output_path),
    )
    alignment_group.dataset_set.add(dataset)

    # Import ExperimentSampleo objects, setting specific uid to match
    # the vcf file.
    with open(EXPERIMENT_SAMPLE_MODEL_DATA_PICKLE) as sample_data_fh:
        es_data = pickle.load(sample_data_fh)
        for es in es_data:
            es_obj = ExperimentSample.objects.create(
                uid=es.uid,
                project=test_project,
                label=es.label
            )

            es_obj.data.update(
                {'group':es.group,
                 'well':es.well,
                 'num_reads':es.num_reads})
            es_obj.save()

    parse_alignment_group_vcf(alignment_group,
            Dataset.TYPE.VCF_FREEBAYES_SNPEFF)
Beispiel #44
0
    def test_run_lumpy__inversion(self):
        """Tests running Lumpy on data with single inversion.
        """
        # Create Datasets / import data.
        self.reference_genome = import_reference_genome_from_local_file(
                self.project, 'ref_genome', INVERSION_REF, 'fasta')

        # Create an alignment that's already complete, so we can focus on
        # testing variant calling only.
        self.alignment_group = AlignmentGroup.objects.create(
                label='test alignment', reference_genome=self.reference_genome)

        r = _create_sample_and_alignment(
                self.project, self.alignment_group, INVERSION_SAMPLE_UID,
                INVERSION_SAMPLE_BWA)
        sample_alignment = r['sample_alignment']

        # Run lumpy.
        lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY])
        lumpy_params['tool_kwargs'] = {
            'region_num': sample_alignment.uid,
            'sample_alignments': [sample_alignment]
        }
        find_variants_with_tool(
                self.alignment_group, lumpy_params, project=self.project)
        merge_lumpy_vcf(self.alignment_group)

        # Grab the resulting variants.
        variants = Variant.objects.filter(
                reference_genome=self.reference_genome)

        self.assertEqual(1, len(variants))

        v = variants[0]

        # position
        self.assertAlmostEqual(v.position, 30000, delta=2)

        # size
        vccd = v.variantcallercommondata_set.all()[0]
        size = vccd.data['INFO_END'] - v.position
        self.assertAlmostEqual(size, 1000, delta=10)
def sv_testing_bootstrap(project):
    sv_testing_dir = os.path.join(GD_ROOT, 'test_data', 'sv_testing', 'all_svs')
    fasta = os.path.join(sv_testing_dir, 'ref.fa')
    fq1 = os.path.join(sv_testing_dir, 'simLibrary.1.fq')
    fq2 = os.path.join(sv_testing_dir, 'simLibrary.2.fq')

    ref_genome = import_reference_genome_from_local_file(
            project, 'ref', fasta, 'fasta')

    sample = ExperimentSample.objects.create(
            project=project,
            label='simLibrary',
    )
    copy_and_add_dataset_source(sample, Dataset.TYPE.FASTQ1,
            Dataset.TYPE.FASTQ1, fq1)
    copy_and_add_dataset_source(sample, Dataset.TYPE.FASTQ2,
            Dataset.TYPE.FASTQ2, fq2)

    if '--sv' in sys.argv:  # using --sv argument runs pipeline for SV project
        run_pipeline('sample_alignment_ref', ref_genome, [sample])
Beispiel #46
0
    def test_dataset_strings(self):

        user = User.objects.create_user(TEST_USERNAME,
                                        password=TEST_PASSWORD,
                                        email=TEST_EMAIL)

        self.test_project = Project.objects.create(title=TEST_PROJECT_NAME,
                                                   owner=user.get_profile())

        self.test_ref_genome = import_reference_genome_from_local_file(
            self.test_project, TEST_REF_GENOME_NAME, TEST_REF_GENOME_PATH,
            'genbank')

        dataset = get_dataset_with_type(
            self.test_ref_genome, type=Dataset.TYPE.REFERENCE_GENOME_GENBANK)

        self.assertEquals(
            dataset.internal_string(self.test_ref_genome),
            (str(self.test_ref_genome.uid) + '_' +
             uppercase_underscore(Dataset.TYPE.REFERENCE_GENOME_GENBANK)))
Beispiel #47
0
    def _generate_test_instance(self, rg_files, rg_names=None):

        if rg_names is None:
            rg_names = [str(i) for i in range(len(rg_files))]

        project = self.common_entities['project']
        ref_genomes = []
        for i, rg_file in enumerate(rg_files):
            file_type = 'fasta' if rg_file.endswith('.fa') else 'genbank'
            ref_genomes.append(
                import_reference_genome_from_local_file(project,
                                                        rg_names[i],
                                                        rg_file,
                                                        file_type,
                                                        move=False))

        test_label = 'concat_test'
        request_data = {
            'newGenomeLabel': test_label,
            'refGenomeUidList': [rg.uid for rg in ref_genomes]
        }

        request = HttpRequest()
        request.POST = {'data': json.dumps(request_data)}
        request.method = 'POST'
        request.user = self.common_entities['user']

        authenticate(username=TEST_USERNAME, password=TEST_PASSWORD)
        self.assertTrue(request.user.is_authenticated())

        ref_genomes_concatenate(request)

        concat_ref = ReferenceGenome.objects.get(label=test_label)

        # Assert correct number of chromosomes
        self.assertEqual(concat_ref.num_chromosomes,
                         sum([rg.num_chromosomes for rg in ref_genomes]))

        # Assert correct number of bases
        self.assertEqual(concat_ref.num_bases,
                         sum([rg.num_bases for rg in ref_genomes]))
Beispiel #48
0
def sv_testing_bootstrap(project):
    sv_testing_dir = os.path.join(GD_ROOT, 'test_data', 'sv_testing',
                                  'all_svs')
    fasta = os.path.join(sv_testing_dir, 'ref.fa')
    fq1 = os.path.join(sv_testing_dir, 'simLibrary.1.fq')
    fq2 = os.path.join(sv_testing_dir, 'simLibrary.2.fq')

    ref_genome = import_reference_genome_from_local_file(
        project, 'ref', fasta, 'fasta')

    sample = ExperimentSample.objects.create(
        project=project,
        label='simLibrary',
    )
    copy_and_add_dataset_source(sample, Dataset.TYPE.FASTQ1,
                                Dataset.TYPE.FASTQ1, fq1)
    copy_and_add_dataset_source(sample, Dataset.TYPE.FASTQ2,
                                Dataset.TYPE.FASTQ2, fq2)

    if '--sv' in sys.argv:  # using --sv argument runs pipeline for SV project
        run_pipeline('sample_alignment_ref', ref_genome, [sample])
        def _generate_test_instance(rg_files, rg_names=None):

            if rg_names == None:
                rg_names = [str(i) for i in range(len(rg_files))]

            project = self.common_entities['project']
            ref_genomes = []
            for i,rg_file in enumerate(rg_files):
                file_type = 'fasta' if rg_file.endswith('.fa') else 'genbank'
                ref_genomes.append(import_reference_genome_from_local_file(
                    project, rg_names[i], rg_file, file_type, move=False))

            test_label = 'concat_test'
            request_data = {
                'newGenomeLabel': test_label,
                'refGenomeUidList': [rg.uid for rg in ref_genomes]
            }

            request = HttpRequest()
            request.POST = {'data':json.dumps(request_data)}
            request.method = 'POST'
            request.user = self.common_entities['user']

            authenticate(username=TEST_USERNAME, password=TEST_PASSWORD)
            self.assertTrue(request.user.is_authenticated())

            response = ref_genomes_concatenate(request)

            concat_ref=ReferenceGenome.objects.get(label=test_label)

            # Assert correct number of chromosomes
            self.assertEqual(concat_ref.num_chromosomes,
                    sum([rg.num_chromosomes for rg in ref_genomes]))

            # Assert correct number of bases
            self.assertEqual(concat_ref.num_bases,
                    sum([rg.num_bases for rg in ref_genomes]))

            # Delete Reference Genome
            concat_ref.delete()
Beispiel #50
0
    def test_dataset_strings(self):

        user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD,
                email=TEST_EMAIL)

        self.test_project = Project.objects.create(
            title=TEST_PROJECT_NAME,
            owner=user.get_profile())

        self.test_ref_genome = import_reference_genome_from_local_file(
            self.test_project,
            TEST_REF_GENOME_NAME,
            TEST_REF_GENOME_PATH,
            'genbank')

        dataset = get_dataset_with_type(self.test_ref_genome,
                type= Dataset.TYPE.REFERENCE_GENOME_GENBANK)

        self.assertEquals(
                dataset.internal_string(self.test_ref_genome),
                (str(self.test_ref_genome.uid) +
                        '_' + uppercase_underscore(Dataset.TYPE.REFERENCE_GENOME_GENBANK)))
Beispiel #51
0
    def test_run_pipeline__snps_with_effect__no_svs(self):
        """Tests pipeline with SNPs with effect, but no SVs called.
        """
        ref_genome = import_reference_genome_from_local_file(
                self.project, 'mg1655_tolC_through_zupT',
                FullVCFTestSet.TEST_GENBANK, 'genbank')

        sample_obj = ExperimentSample.objects.create(
                project=self.project,
                label='Sample %d' % 0)

        # Add raw reads to each sample.
        copy_and_add_dataset_source(sample_obj,
                Dataset.TYPE.FASTQ1,
                Dataset.TYPE.FASTQ1,
                FullVCFTestSet.FASTQ1[0])
        copy_and_add_dataset_source(sample_obj,
                Dataset.TYPE.FASTQ2,
                Dataset.TYPE.FASTQ2,
                FullVCFTestSet.FASTQ2[0])

        result = run_pipeline(
            'test_align', ref_genome, [sample_obj])

        alignment_group = result[0]
        alignment_async_result = result[1]
        variant_calling_async_result = result[2]
        alignment_async_result.get()
        variant_calling_async_result.get()
        alignment_group = AlignmentGroup.objects.get(uid=alignment_group.uid)
        self.assertEqual(AlignmentGroup.STATUS.COMPLETED,
                alignment_group.status)

        # Check that SnpEff worked.
        v_205 = Variant.objects.get(
                reference_genome=alignment_group.reference_genome, position=205)
        v_205_va = v_205.variantalternate_set.all()[0]
        self.assertEqual('tolC', v_205_va.data['INFO_EFF_GENE'])
Beispiel #52
0
    def setUp(self):
        """Override.
        """
        user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD,
                email=TEST_EMAIL)

        self.test_project = Project.objects.create(
            title=TEST_PROJECT_NAME,
            owner=user.get_profile())

        self.test_ref_genome = ReferenceGenome.objects.create(
            project=self.test_project,
            label='boom')

        self.test_chromosome = Chromosome.objects.create(
            reference_genome=self.test_ref_genome,
            label='Chromosome',
            num_bases=9001)

        self.test_ext_ref_genome = import_reference_genome_from_local_file(
            self.test_project,
            TEST_REF_GENOME_NAME,
            TEST_REF_GENOME_PATH,
            'genbank')
Beispiel #53
0
 def setUp(self):
     self.common_data = create_common_entities()
     self.project = self.common_data['project']
     self.reference_genome = import_reference_genome_from_local_file(
             self.project, 'ref_genome', TEST_FASTA, 'fasta')
Beispiel #54
0
    def test_run_lumpy(self):
        TEST_SAMPLE_UID = '8c57e7b9'

        # Create a ref genome.
        self.reference_genome = import_reference_genome_from_local_file(
            self.project, 'ref_genome', TEST_FASTA, 'fasta')

        # Create a sample.
        self.experiment_sample = ExperimentSample.objects.create(
            uid=TEST_SAMPLE_UID, project=self.project, label='sample1')

        # Create a new alignment group.
        alignment_group = AlignmentGroup.objects.create(
            label='test alignment', reference_genome=self.reference_genome)

        self.alignment_group = alignment_group

        # Create the expected models.
        sample_alignment = ExperimentSampleToAlignment.objects.create(
            alignment_group=alignment_group,
            experiment_sample=self.experiment_sample)
        bwa_dataset = Dataset.objects.create(label=Dataset.TYPE.BWA_ALIGN,
                                             type=Dataset.TYPE.BWA_ALIGN,
                                             status=Dataset.STATUS.READY)
        bwa_dataset.filesystem_location = clean_filesystem_location(
            TEST_DISC_SPLIT_BAM)
        bwa_dataset.save()

        sample_alignment.dataset_set.add(bwa_dataset)
        sample_alignment.save()

        self.bwa_dataset = bwa_dataset
        self.sample_alignment = sample_alignment

        fasta_ref = get_dataset_with_type(
            self.reference_genome,
            Dataset.TYPE.REFERENCE_GENOME_FASTA).get_absolute_location()

        sample_alignments = [self.sample_alignment]

        vcf_output_dir = self.alignment_group.get_model_data_dir()

        vcf_output_filename = os.path.join(vcf_output_dir, 'lumpy.vcf')

        alignment_type = 'BWA_ALIGN'

        # NOTE: Running these functions but not checking results.
        get_discordant_read_pairs(self.sample_alignment)
        get_split_reads(self.sample_alignment)

        run_lumpy(fasta_ref, sample_alignments, vcf_output_dir,
                  vcf_output_filename, alignment_type)

        dataset = Dataset.objects.create(
            type=Dataset.TYPE.VCF_LUMPY,
            label=Dataset.TYPE.VCF_LUMPY,
            filesystem_location=vcf_output_filename,
        )

        self.alignment_group.dataset_set.add(dataset)

        # Parse the resulting vcf, grab variant objects
        parse_alignment_group_vcf(self.alignment_group, Dataset.TYPE.VCF_LUMPY)

        # Grab the resulting variants.
        variants = Variant.objects.filter(
            reference_genome=self.reference_genome)

        # There should be a Variant object for each sv event.
        self.assertEqual(2, len(variants))

        # One event should be located very close to 25k
        va_positions = [v.position for v in variants]
        va_offset = [25000 - va_pos for va_pos in va_positions]
        self.assertTrue(any([v < 50 for v in va_offset]))