def setUp(self):
    self.dbconfig = 'data/dbconfig.json'
    dbparam=read_dbconf_json(self.dbconfig)
    base = BaseAdaptor(**dbparam)
    self.engine = base.engine
    self.dbname=dbparam['dbname']
    Base.metadata.drop_all(self.engine)
    if os.path.exists(self.dbname):
      os.remove(self.dbname)
    Base.metadata.create_all(self.engine)
    self.session_class=base.get_session_class()

    base = BaseAdaptor(**{'session_class':self.session_class})
    base.start_session()
    platform_data=[{ "platform_igf_id" : "M001",
                     "model_name" : "MISEQ" ,
                     "vendor_name" : "ILLUMINA" ,
                     "software_name" : "RTA",
                     "software_version" : "RTA1.18.54"}]                        # platform data
    flowcell_rule_data=[{"platform_igf_id":"M001",
                         "flowcell_type":"MISEQ",
                         "index_1":"NO_CHANGE",
                         "index_2":"NO_CHANGE"}]                                # flowcell rule data
    pl=PlatformAdaptor(**{'session':base.session})
    pl.store_platform_data(data=platform_data)                                  # loading platform data
    pl.store_flowcell_barcode_rule(data=flowcell_rule_data)                     # loading flowcell rules data
    project_data=[{'project_igf_id':'ProjectA'}]                                # project data
    pa=ProjectAdaptor(**{'session':base.session})
    pa.store_project_and_attribute_data(data=project_data)                      # load project data
    sample_data=[{'sample_igf_id':'SampleA',
                  'project_igf_id':'ProjectA'}]                                 # sample data
    sa=SampleAdaptor(**{'session':base.session})
    sa.store_sample_and_attribute_data(data=sample_data)                        # store sample data
    seqrun_data=[{'seqrun_igf_id':'SeqrunA', 
                  'flowcell_id':'000000000-D0YLK', 
                  'platform_igf_id':'M001',
                  'flowcell':'MISEQ'}]                                          # seqrun data
    sra=SeqrunAdaptor(**{'session':base.session})
    sra.store_seqrun_and_attribute_data(data=seqrun_data)                       # load seqrun data
    experiment_data=[{'experiment_igf_id':'ExperimentA',
                      'sample_igf_id':'SampleA',
                      'library_name':'SampleA',
                      'platform_name':'MISEQ',
                      'project_igf_id':'ProjectA'}]                             # experiment data
    ea=ExperimentAdaptor(**{'session':base.session})
    ea.store_project_and_attribute_data(data=experiment_data)                   # load experiment data
    base.commit_session()
    base.close_session()
 def test_fetch_project_and_sample_for_experiment(self):
   ea = ExperimentAdaptor(**{'session_class':self.session_class})
   ea.start_session()
   project_id,sample_id=ea.fetch_project_and_sample_for_experiment(experiment_igf_id='ExperimentA')
   self.assertEqual(project_id,'ProjectA')
   self.assertEqual(sample_id,'SampleA')
   ea.close_session()
Example #3
0
 def setUp(self):
   self.dbconfig = 'data/dbconfig.json'
   dbparam=read_dbconf_json(self.dbconfig)
   base = BaseAdaptor(**dbparam)
   self.engine = base.engine
   self.dbname=dbparam['dbname']
   Base.metadata.drop_all(self.engine)
   if os.path.exists(self.dbname):
     os.remove(self.dbname)
   Base.metadata.create_all(self.engine)
   self.session_class=base.get_session_class()
   base.start_session()
   project_data=[{'project_igf_id':'ProjectA'}]
   pa=ProjectAdaptor(**{'session':base.session})
   pa.store_project_and_attribute_data(data=project_data)                      # load project data
   sample_data=[{'sample_igf_id':'SampleA',
                 'project_igf_id':'ProjectA'}]                                 # sample data
   sa=SampleAdaptor(**{'session':base.session})
   sa.store_sample_and_attribute_data(data=sample_data)                        # store sample data
   experiment_data=[{'experiment_igf_id':'ExperimentA',
                     'sample_igf_id':'SampleA',
                     'library_name':'SampleA',
                     'platform_name':'MISEQ',
                     'project_igf_id':'ProjectA'}]                             # experiment data
   ea=ExperimentAdaptor(**{'session':base.session})
   ea.store_project_and_attribute_data(data=experiment_data)
   self.temp_dir=get_temp_dir()
   temp_files=['a.csv','b.csv']
   for temp_file in temp_files:
     with open(os.path.join(self.temp_dir,temp_file),'w') as fp:
       fp.write('A')
   collection_data=[{'name':'ExperimentA',
                     'type':'AnalysisA_html',
                     'table':'experiment',
                     'file_path':os.path.join(self.temp_dir,temp_file)}
                     for temp_file in temp_files]
   ca=CollectionAdaptor(**{'session':base.session})
   ca.load_file_and_create_collection(data=collection_data,
                                      calculate_file_size_and_md5=False)
   base.close_session()
Example #4
0
 def run(self):
     try:
         project_igf_id = self.param_required('project_igf_id')
         experiment_igf_id = self.param_required('experiment_igf_id')
         sample_igf_id = self.param_required('sample_igf_id')
         igf_session_class = self.param_required('igf_session_class')
         ea = ExperimentAdaptor(**{'session_class': igf_session_class})
         ea.start_session()
         runs = ea.fetch_runs_for_igf_id(
             experiment_igf_id=experiment_igf_id,
             include_active_runs=True,
             output_mode='dataframe')  # fetch active runs for an experiment
         ea.close_session()
         runs = runs.to_dict(orient='records')  # convert run ids to a list
         self.param('sub_tasks', runs)  # pass on run factory output list
     except Exception as e:
         message='project: {2}, sample:{3}, Error in {0}: {1}'.format(self.__class__.__name__, \
                                                         e, \
                                                         project_igf_id,
                                                         sample_igf_id)
         self.warning(message)
         self.post_message_to_slack(
             message, reaction='fail')  # post msg to slack for failed jobs
         raise
 def test_check_experiment_records_id(self):
   ea = ExperimentAdaptor(**{'session_class':self.session_class})
   ea.start_session()
   self.assertTrue(ea.check_experiment_records_id(experiment_igf_id='ExperimentA'))
   self.assertFalse(ea.check_experiment_records_id(experiment_igf_id='ExperimentB'))
   ea.close_session()
    def load_file_to_disk_and_db(self,
                                 input_file_list,
                                 withdraw_exisitng_collection=True,
                                 autosave_db=True,
                                 file_suffix=None,
                                 force=True,
                                 remove_file=False):
        '''
    A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present.
    Directory structure of the final path is based on the collection_table information.
    
    Following will be the final directory structure if base_path is present
    
    project - base_path/project_igf_id/analysis_name
    sample - base_path/project_igf_id/sample_igf_id/analysis_name
    experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name
    run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name
    
    :param input_file_list: A list of input file to load, all using the same collection info
    :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files
    :param autosave_db: Save changes to database, default True
    :param file_suffix: Use a specific file suffix, use None if it should be same as original file
                        e.g. input.vcf.gz to  output.vcf.gz
    :param force: Toggle for removing existing file, default True
    :param remove_file: A toggle for removing existing file from disk, default False
    :returns: A list of final filepath
    '''
        try:
            project_igf_id = None
            sample_igf_id = None
            experiment_igf_id = None
            experiment_igf_id = None
            run_igf_id = None
            output_path_list = list()  # define empty output list
            dbconnected = False
            if self.collection_name is None or \
               self.collection_type is None or \
               self.collection_table is None:
                raise ValueError('File collection information is incomplete'
                                 )  # check for collection information

            base = BaseAdaptor(**{'session_class': self.dbsession_class})
            base.start_session()  # connect to db
            dbconnected = True
            if self.base_path is not None:
                if self.collection_table == 'sample':
                    sa = SampleAdaptor(**{'session': base.session})
                    sample_igf_id = self.collection_name
                    sample_exists = sa.check_sample_records_igf_id(
                        sample_igf_id=sample_igf_id)
                    if not sample_exists:
                        raise ValueError('Sample {0} not found in db'.\
                                         format(sample_igf_id))

                    project_igf_id = \
                      sa.fetch_sample_project(sample_igf_id=sample_igf_id)                # fetch project id for sample
                elif self.collection_table == 'experiment':
                    ea = ExperimentAdaptor(**{'session': base.session})
                    experiment_igf_id = self.collection_name
                    experiment_exists = \
                      ea.check_experiment_records_id(
                        experiment_igf_id=experiment_igf_id)
                    if not experiment_exists:
                        raise ValueError('Experiment {0} not present in database'.\
                                         format(experiment_igf_id))

                    (project_igf_id,sample_igf_id) = \
                        ea.fetch_project_and_sample_for_experiment(
                          experiment_igf_id=experiment_igf_id)                            # fetch project and sample id for experiment
                elif self.collection_table == 'run':
                    ra = RunAdaptor(**{'session': base.session})
                    run_igf_id = self.collection_name
                    run_exists = ra.check_run_records_igf_id(
                        run_igf_id=run_igf_id)
                    if not run_exists:
                        raise ValueError('Run {0} not found in database'.\
                                         format(run_igf_id))

                    (project_igf_id,sample_igf_id,experiment_igf_id) = \
                      ra.fetch_project_sample_and_experiment_for_run(
                        run_igf_id=run_igf_id)                                            # fetch project, sample and experiment id for run
                elif self.collection_table == 'project':
                    pa = ProjectAdaptor(**{'session': base.session})
                    project_igf_id = self.collection_name
                    project_exists = \
                      pa.check_project_records_igf_id(
                        project_igf_id=project_igf_id)
                    if not project_exists:
                        raise ValueError('Project {0} not found in database'.\
                                         format(project_igf_id))

            if self.rename_file and self.analysis_name is None:
                raise ValueError('Analysis name is required for renaming file'
                                 )  # check analysis name

            for input_file in input_file_list:
                final_path = ''
                if self.base_path is None:  # do not move file if base_path is absent
                    final_path = os.path.dirname(input_file)
                else:  # move file path
                    if self.collection_table == 'project':
                        if project_igf_id is None:
                            raise ValueError('Missing project id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            self.analysis_name)                                             # final path for project
                    elif self.collection_table == 'sample':
                        if project_igf_id is None or \
                           sample_igf_id is None:
                            raise ValueError('Missing project and sample id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            self.analysis_name)                                             # final path for sample
                    elif self.collection_table == 'experiment':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None:
                            raise ValueError('Missing project,sample and experiment id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            self.analysis_name)                                             # final path for experiment
                    elif self.collection_table == 'run':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None or \
                           run_igf_id is None:
                            raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(\
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            run_igf_id,
                            self.analysis_name)                                             # final path for run

                if self.rename_file:
                    new_filename = \
                      self.get_new_file_name(
                        input_file=input_file,
                        file_suffix=file_suffix)
                    final_path = \
                      os.path.join(
                        final_path,
                        new_filename)                                                     # get new filepath
                else:
                    final_path = \
                      os.path.join(
                        final_path,
                        os.path.basename(input_file))

                if final_path != input_file:  # move file if its required
                    final_path = preprocess_path_name(
                        input_path=final_path
                    )  # remove unexpected characters from file path
                    move_file(source_path=input_file,
                              destinationa_path=final_path,
                              force=force
                              )  # move or overwrite file to destination dir

                output_path_list.append(
                    final_path)  # add final path to the output list
                self.create_or_update_analysis_collection(
                    file_path=final_path,
                    dbsession=base.session,
                    withdraw_exisitng_collection=withdraw_exisitng_collection,
                    remove_file=remove_file,
                    autosave_db=autosave_db)  # load new file collection in db
                if autosave_db:
                    base.commit_session()  # save changes to db for each file

            base.commit_session()  # save changes to db
            base.close_session()  # close db connection
            return output_path_list
        except:
            if dbconnected:
                base.rollback_session()
                base.close_session()
            raise
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [{
         "platform_igf_id": "M03291",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }, {
         "platform_igf_id": "NB501820",
         "model_name": "NEXTSEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }, {
         "platform_igf_id": "K00345",
         "model_name": "HISEQ4000",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 SR",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }, {
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 PE",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "NB501820",
         "flowcell_type": "NEXTSEQ",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     seqrun_data = [{
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'flowcell_id': '000000000-BRN47',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }, {
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'flowcell_id': '000000001-BRN47',
         'platform_igf_id': 'NB501820',
         'flowcell': 'NEXTSEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     project_data = [{'project_igf_id': 'projectA'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'sampleA',
             'project_igf_id': 'projectA',
             'species_name': 'HG38'
         },
         {
             'sample_igf_id': 'sampleB',
             'project_igf_id': 'projectA',
             'species_name': 'UNKNOWN'
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_MISEQ',
             'library_name': 'sampleA',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_NEXTSEQ',
             'library_name': 'sampleA',
             'library_source': 'UNKNOWN',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'NEXTSEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleB',
             'experiment_igf_id': 'sampleB_MISEQ',
             'library_name': 'sampleB',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'sampleA_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'sampleA_NEXTSEQ',
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'lane_number': '2'
     }, {
         'experiment_igf_id': 'sampleB_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     file_data = [
         {
             'file_path':
             '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path':
             '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [{
         'name': 'sampleA_MISEQ_000000000-BRN47_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleB_MISEQ_HVWN7BBXX_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }]
     collection_files_data = [{
         'name':
         'sampleA_MISEQ_000000000-BRN47_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz'
     }, {
         'name':
         'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz'
     }, {
         'name':
         'sampleB_MISEQ_HVWN7BBXX_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz'
     }]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     base.close_session()
 def test_update_metadta_from_sample_attribute1(self):
     ea = ExperimentAdaptor(**{'session_class': self.session_class})
     ea.start_session()
     exp1 = ea.fetch_experiment_records_id(
         experiment_igf_id='IGF00001_HISEQ4000')
     self.assertEqual(exp1.library_strategy, 'UNKNOWN')
     exp2 = ea.fetch_experiment_records_id(
         experiment_igf_id='IGF00002_HISEQ4000')
     self.assertEqual(exp2.library_strategy, 'UNKNOWN')
     exp3 = ea.fetch_experiment_records_id(
         experiment_igf_id='IGF00003_HISEQ4000')
     self.assertEqual(exp3.library_source, 'UNKNOWN')
     ea.close_session()
     emu = Experiment_metadata_updator(dbconfig_file=self.dbconfig,
                                       log_slack=False)
     emu.update_metadta_from_sample_attribute()
     ea = ExperimentAdaptor(**{'session_class': self.session_class})
     ea.start_session()
     exp1 = ea.fetch_experiment_records_id(
         experiment_igf_id='IGF00001_HISEQ4000')
     self.assertEqual(exp1.library_strategy, 'RNA-SEQ')
     exp2 = ea.fetch_experiment_records_id(
         experiment_igf_id='IGF00002_HISEQ4000')
     self.assertEqual(exp2.library_strategy, 'UNKNOWN')
     exp3 = ea.fetch_experiment_records_id(
         experiment_igf_id='IGF00003_HISEQ4000')
     self.assertEqual(exp3.library_source, 'TRANSCRIPTOMIC_SINGLE_CELL')
     ea.close_session()
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.drop_all(self.engine)
        if os.path.exists(self.dbname):
            os.remove(self.dbname)
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        self.temp_work_dir = get_temp_dir()
        self.temp_base_dir = get_temp_dir()
        self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz']
        for file_name in self.input_list:
            file_path = os.path.join(self.temp_work_dir, file_name)
            with open(file_path, 'w') as fq:
                fq.write('AAAA')  # create input files

        base = BaseAdaptor(**{'session_class': self.session_class})
        base.start_session()
        platform_data = [{
            "platform_igf_id": "M001",
            "model_name": "MISEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA1.18.54"
        }]  # platform data
        flowcell_rule_data = [{
            "platform_igf_id": "M001",
            "flowcell_type": "MISEQ",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }]  # flowcell rule data
        pl = PlatformAdaptor(**{'session': base.session})
        pl.store_platform_data(data=platform_data)  # loading platform data
        pl.store_flowcell_barcode_rule(
            data=flowcell_rule_data)  # loading flowcell rules data
        project_data = [{'project_igf_id': 'ProjectA'}]  # project data
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(
            data=project_data)  # load project data
        sample_data = [{
            'sample_igf_id': 'SampleA',
            'project_igf_id': 'ProjectA'
        }]  # sample data
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(
            data=sample_data)  # store sample data
        seqrun_data = [{
            'seqrun_igf_id': 'SeqrunA',
            'flowcell_id': '000000000-D0YLK',
            'platform_igf_id': 'M001',
            'flowcell': 'MISEQ'
        }]  # seqrun data
        sra = SeqrunAdaptor(**{'session': base.session})
        sra.store_seqrun_and_attribute_data(
            data=seqrun_data)  # load seqrun data
        experiment_data = [{
            'experiment_igf_id': 'ExperimentA',
            'sample_igf_id': 'SampleA',
            'library_name': 'SampleA',
            'platform_name': 'MISEQ',
            'project_igf_id': 'ProjectA'
        }]  # experiment data
        ea = ExperimentAdaptor(**{'session': base.session})
        ea.store_project_and_attribute_data(
            data=experiment_data)  # load experiment data
        run_data = [{
            'run_igf_id': 'RunA',
            'experiment_igf_id': 'ExperimentA',
            'seqrun_igf_id': 'SeqrunA',
            'lane_number': '1'
        }]  # run data
        ra = RunAdaptor(**{'session': base.session})
        ra.store_run_and_attribute_data(data=run_data)  # load run data
        base.commit_session()
        base.close_session()
Example #10
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        # load platform data
        platform_data=\
          [{"platform_igf_id" : "M03291" ,
            "model_name" : "MISEQ" ,
            "vendor_name" : "ILLUMINA" ,
            "software_name" : "RTA" ,
            "software_version" : "RTA1.18.54"
           },
           {"platform_igf_id" : "NB501820",
            "model_name" : "NEXTSEQ",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           },
           {"platform_igf_id" : "K00345",
            "model_name" : "HISEQ4000",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           }]

        flowcell_rule_data=\
          [{"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 SR",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"},
           {"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 PE",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"NB501820",
            "flowcell_type":"NEXTSEQ",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"M03291",
            "flowcell_type":"MISEQ",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"}]

        pl = PlatformAdaptor(**{'session_class': base.session_class})
        pl.start_session()
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        pl.close_session()

        # load project data

        project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}]
        pa = ProjectAdaptor(**{'session_class': base.session_class})
        pa.start_session()
        pa.store_project_and_attribute_data(data=project_data)
        pa.close_session()

        # load samples

        sample_data = [
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109792',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109793',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109794',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109795',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109796',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797_1',
                'expected_read': 40000000
            },
        ]

        sa = SampleAdaptor(**{'session_class': base.session_class})
        sa.start_session()
        sa.store_sample_and_attribute_data(data=sample_data)
        sa.close_session()

        # load seqrun data

        seqrun_data = [{
            'flowcell_id': 'HV2GJBBXX',
            'platform_igf_id': 'K00345',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX'
        }]

        sra = SeqrunAdaptor(**{'session_class': base.session_class})
        sra.start_session()
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        sra.close_session()

        # load experiment data

        experiment_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'library_name': 'IGF109792',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109792',
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'library_name': 'IGF109793',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109793',
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'library_name': 'IGF109794',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109794',
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'library_name': 'IGF109795',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109795',
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'library_name': 'IGF109796',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109796',
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'library_name': 'IGF109797',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109797',
           },
          ]

        ea = ExperimentAdaptor(**{'session_class': base.session_class})
        ea.start_session()
        ea.store_project_and_attribute_data(data=experiment_data)
        ea.close_session()

        # load run data

        run_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':288046541
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':14666330
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':5009143
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1391747
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': '	IGF109796_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1318008
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1216324
           },
          ]

        ra = RunAdaptor(**{'session_class': base.session_class})
        ra.start_session()
        ra.store_run_and_attribute_data(data=run_data)
        ra.close_session()
        'experiment_igf_id': 'ExperimentA',
        'seqrun_igf_id': '180410_K00345_0063_AHWL7CBBXX',
        'lane_number': '1'
    }]  # run data
    base.start_session()
    pl = PlatformAdaptor(**{'session': base.session})
    pl.store_platform_data(data=platform_data)  # loading platform data
    pl.store_flowcell_barcode_rule(
        data=flowcell_rule_data)  # loading flowcell rules data
    pa = ProjectAdaptor(**{'session': base.session})
    pa.store_project_and_attribute_data(data=project_data)  # load project data
    sa = SampleAdaptor(**{'session': base.session})
    sa.store_sample_and_attribute_data(data=sample_data)  # store sample data
    sra = SeqrunAdaptor(**{'session': base.session})
    sra.store_seqrun_and_attribute_data(data=seqrun_data)  # load seqrun data
    ea = ExperimentAdaptor(**{'session': base.session})
    ea.store_project_and_attribute_data(
        data=experiment_data)  # load experiment data
    ra = RunAdaptor(**{'session': base.session})
    ra.store_run_and_attribute_data(data=run_data)  # load run data
    pipeline_data = [{
        "pipeline_name": "DemultiplexIlluminaFastq",
        "pipeline_db": "sqlite:////bcl2fastq.db",
    }]

    pipeline_seed_data = [
        {
            'pipeline_name': 'DemultiplexIlluminaFastq',
            'seed_id': 1,
            'seed_table': 'seqrun'
        },
    def _build_and_store_exp_run_and_collection_in_db(self,fastq_files_list, \
                                                      restricted_list=('10X')):
        '''
    An internal method for building db collections for the raw fastq files
    '''
        session_class = self.session_class
        db_connected = False
        try:
            restricted_list = list(restricted_list)
            dataframe = pd.DataFrame(fastq_files_list)
            # calculate additional detail
            dataframe=dataframe.apply(lambda data: \
                                      self._calculate_experiment_run_and_file_info(data,
                                                                     restricted_list),\
                                      axis=1)
            # get file data
            file_group_columns = [
                'name', 'type', 'location', 'R1', 'R1_md5', 'R1_size', 'R2',
                'R2_md5', 'R2_size'
            ]
            file_group_data = dataframe.loc[:, file_group_columns]
            file_group_data = file_group_data.drop_duplicates()
            (file_data, file_group_data) = self._reformat_file_group_data(
                data=file_group_data)
            # get base session
            base = BaseAdaptor(**{'session_class': session_class})
            base.start_session()
            db_connected = True
            # get experiment data
            experiment_columns=base.get_table_columns(table_name=Experiment, \
                                                      excluded_columns=['experiment_id',
                                                                        'project_id',
                                                                        'sample_id' ])
            experiment_columns.extend(['project_igf_id', 'sample_igf_id'])
            exp_data = dataframe.loc[:, experiment_columns]
            exp_data = exp_data.drop_duplicates()
            if exp_data.index.size > 0:
                exp_data=exp_data.apply(lambda x: \
                                        self._check_existing_data(\
                                              data=x,\
                                              dbsession=base.session,\
                                              table_name='experiment',\
                                              check_column='EXISTS'),\
                                        axis=1)
                exp_data = exp_data[exp_data['EXISTS'] ==
                                    False]  # filter existing experiments
                exp_data.drop('EXISTS', axis=1,
                              inplace=True)  # remove extra columns
                exp_data = exp_data[pd.isnull(exp_data['experiment_igf_id']) ==
                                    False]  # filter exp with null values
            # get run data
            run_columns=base.get_table_columns(table_name=Run, \
                                               excluded_columns=['run_id',
                                                                 'seqrun_id',
                                                                 'experiment_id',
                                                                 'date_created',
                                                                 'status'
                                                                ])
            run_columns.extend([
                'seqrun_igf_id', 'experiment_igf_id', 'R1_READ_COUNT',
                'R2_READ_COUNT'
            ])
            run_data = dataframe.loc[:, run_columns]
            run_data = run_data.drop_duplicates()
            if run_data.index.size > 0:
                run_data=run_data.apply(lambda x: \
                                        self._check_existing_data(\
                                              data=x,\
                                              dbsession=base.session,\
                                              table_name='run',\
                                              check_column='EXISTS'),\
                                        axis=1)
                run_data = run_data[run_data['EXISTS'] ==
                                    False]  # filter existing runs
                run_data.drop('EXISTS', axis=1,
                              inplace=True)  # remove extra columns
                run_data = run_data[pd.isnull(run_data['run_igf_id']) ==
                                    False]  # filter run with null values
            # get collection data
            collection_columns = ['name', 'type', 'table']
            collection_data = dataframe.loc[:, collection_columns]
            collection_data = collection_data.drop_duplicates()
            if collection_data.index.size > 0:
                collection_data=collection_data.apply(lambda x: \
                                        self._check_existing_data( \
                                              data=x, \
                                              dbsession=base.session, \
                                              table_name='collection', \
                                              check_column='EXISTS'), \
                                        axis=1)
                collection_data = collection_data[collection_data[
                    'EXISTS'] == False]  # filter existing collection
                collection_data.drop('EXISTS', axis=1,
                                     inplace=True)  # remove extra columns
                collection_data = collection_data[pd.isnull(
                    collection_data['name']
                ) == False]  # filter collection with null values
            # store experiment to db
            if exp_data.index.size > 0:
                ea = ExperimentAdaptor(**{'session': base.session})
                ea.store_project_and_attribute_data(data=exp_data,
                                                    autosave=False)
                base.session.flush()
            # store run to db
            if run_data.index.size > 0:
                ra = RunAdaptor(**{'session': base.session})
                ra.store_run_and_attribute_data(data=run_data, autosave=False)
                base.session.flush()
            # store file to db

            fa = FileAdaptor(**{'session': base.session})
            fa.store_file_and_attribute_data(data=file_data, autosave=False)
            base.session.flush()
            # store collection to db
            ca = CollectionAdaptor(**{'session': base.session})
            if collection_data.index.size > 0:
                ca.store_collection_and_attribute_data(data=collection_data,\
                                                       autosave=False)
                base.session.flush()
            ca.create_collection_group(data=file_group_data, autosave=False)
            base.commit_session()
            self._write_manifest_file(file_data)
        except:
            if db_connected:
                base.rollback_session()
            raise
        finally:
            if db_connected:
                base.close_session()
    def _check_existing_data(data,
                             dbsession,
                             table_name,
                             check_column='EXISTS'):
        try:
            if not isinstance(data, pd.Series):
                raise ValueError('Expecting a data series and got {0}'.format(
                    type(data)))

            if table_name == 'experiment':
                if 'experiment_igf_id' in data and \
                   not pd.isnull(data['experiment_igf_id']):
                    experiment_igf_id = data['experiment_igf_id']
                    ea = ExperimentAdaptor(**{'session': dbsession})
                    experiment_exists = ea.check_experiment_records_id(
                        experiment_igf_id)
                    if experiment_exists:  # store data only if experiment is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError(
                        'Missing or empty required column experiment_igf_id')
            elif table_name == 'run':
                if 'run_igf_id' in data and \
                   not pd.isnull(data['run_igf_id']):
                    run_igf_id = data['run_igf_id']
                    ra = RunAdaptor(**{'session': dbsession})
                    run_exists = ra.check_run_records_igf_id(run_igf_id)
                    if run_exists:  # store data only if run is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError(
                        'Missing or empty required column run_igf_id')

            elif table_name == 'collection':
                if 'name' in data and 'type' in data and \
                   not pd.isnull(data['name']) and \
                   not pd.isnull(data['type']):
                    ca = CollectionAdaptor(**{'session': dbsession})
                    collection_exists=ca.check_collection_records_name_and_type(\
                                           collection_name=data['name'], \
                                           collection_type=data['type'])
                    if collection_exists:
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError(
                        'Missing or empty required column name or type')

            else:
                raise ValueError(
                    'table {0} not supported yet'.format(table_name))
        except:
            raise
  def update_metadta_from_sample_attribute(self,experiment_igf_id=None,
                                           sample_attribute_names=('library_source',
                                                                   'library_strategy',
                                                                   'experiment_type')):
    '''
    A method for fetching experiment metadata from sample_attribute tables
    :param experiment_igf_id: An experiment igf id for updating only a selected experiment, default None for all experiments
    :param sample_attribute_names: A list of sample attribute names to look for experiment metadata,
                                   default: library_source, library_strategy, experiment_type
    '''
    try:
      sample_attribute_names = list(sample_attribute_names)
      db_connected=False
      base=self.base_adaptor
      base.start_session()
      db_connected=True
      query=base.session.\
            query(Experiment.experiment_igf_id).\
            distinct(Experiment.experiment_id).\
            join(Sample).\
            join(Sample_attribute).\
            filter(Sample.sample_id==Experiment.sample_id).\
            filter(Sample.sample_id==Sample_attribute.sample_id).\
            filter(Experiment.library_source=='UNKNOWN').\
            filter(Experiment.library_strategy=='UNKNOWN').\
            filter(Experiment.experiment_type=='UNKNOWN').\
            filter(Sample_attribute.attribute_value.notin_('UNKNOWN')).\
            filter(Sample_attribute.attribute_name.in_(sample_attribute_names)) # base query for db lookup
      if experiment_igf_id is not None:
        query=query.filter(Experiment.experiment_igf_id==experiment_igf_id)     # look for specific experiment_igf_id

      exp_update_count=0
      exps=base.fetch_records(query, output_mode='object')                      # fetch exp records as generator expression
      for row in exps:
        experiment_id=row[0]
        ea=ExperimentAdaptor(**{'session':base.session})
        attributes=ea.fetch_sample_attribute_records_for_experiment_igf_id(experiment_igf_id=experiment_id, 
                                                                output_mode='object',
                                                                attribute_list=sample_attribute_names)
        exp_update_data=dict()
        for attribute_row in attributes:
          exp_update_data.update({attribute_row.attribute_name:attribute_row.attribute_value})

        if len(exp_update_data.keys())>0:
          exp_update_count+=1
          ea.update_experiment_records_by_igf_id(experiment_igf_id=experiment_id,
                                                 update_data=exp_update_data,
                                                 autosave=False)                # update experiment entry if attribute records are found

      base.commit_session()
      base.close_session()
      db_connected=False
      if self.log_slack:
        message='Update {0} experiments from sample attribute records'.\
                format(exp_update_count)
        self.igf_slack.post_message_to_channel(message=message,
                                               reaction='pass')
    except Exception as e:
      if db_connected:
        base.rollback_session()
        base.close_session()
        message='Error while updating experiment records: {0}'.format(e)
        warnings.warn(message)
        if self.log_slack:
          self.igf_slack.post_message_to_channel(message=message,
                                                 reaction='fail')
      raise
Example #15
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'IGF00001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00001',
             'experiment_igf_id': 'IGF00001_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00003',
             'experiment_igf_id': 'IGF00003_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00002',
             'experiment_igf_id': 'IGF00002_HISEQ4000',
             'library_name': 'IGF00002'
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     pipeline_data = [{
         "pipeline_name": "alignment",
         "pipeline_db": "sqlite:////data/aln.db",
         "pipeline_init_conf": {
             "input_dir": "data/fastq_dir/",
             "output_dir": "data"
         },
         "pipeline_run_conf": {
             "output_dir": "data"
         }
     }]
     pl = PipelineAdaptor(**{'session': base.session})
     pl.store_pipeline_data(data=pipeline_data)
     pipeline_seed_data = [
         {
             'pipeline_name': 'alignment',
             'seed_id': '1',
             'seed_table': 'experiment'
         },
     ]
     pl.create_pipeline_seed(data=pipeline_seed_data)
     base.close_session()
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     base.start_session()
     self.session_class = base.get_session_class()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'IGF00001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00001',
             'experiment_igf_id': 'IGF00001_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00003',
             'experiment_igf_id': 'IGF00003_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00002',
             'experiment_igf_id': 'IGF00002_HISEQ4000',
             'library_name': 'IGF00002'
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     base.close_session()
Example #17
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [
         {
             "platform_igf_id": "M03291",
             "model_name": "MISEQ",
             "vendor_name": "ILLUMINA",
             "software_name": "RTA",
             "software_version": "RTA1.18.54"
         },
     ]
     flowcell_rule_data = [{
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     project_data = [{'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [{
         'sample_igf_id': 'IGF103923',
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'species_name': 'HG38'
     }]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     seqrun_data = [
         {
             'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
             'flowcell_id': '000000000-BRN47',
             'platform_igf_id': 'M03291',
             'flowcell': 'MISEQ'
         },
     ]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     pipeline_data = [
         {
             "pipeline_name": "PrimaryAnalysis",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
         {
             "pipeline_name": "DemultiplexIlluminaFastq",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
     ]
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=pipeline_data)
     file_data = [
         {
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404'
         },
         {
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
         {
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
     ]
     collection_files_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz'
         },
     ]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     experiment_data = [{
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ',
         'library_name': 'IGF103923',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'TENX-TRANSCRIPTOME-3P',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }, {
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'library_name': 'IGF103923_1',
         'library_source': 'GENOMIC_SINGLE_CELL',
         'library_strategy': 'WGS',
         'experiment_type': 'UNKNOWN',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'IGF103923_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ1_000000000-BRN47_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     base.close_session()
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.drop_all(self.engine)
     if os.path.exists(self.dbname):
         os.remove(self.dbname)
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     # PLATFORM
     platform_data = [{
         "platform_igf_id": "M03291",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     # SEQRUN
     seqrun_data = [{
         'seqrun_igf_id': '180416_M03291_0139_000000000-TEST',
         'flowcell_id': '000000000-TEST',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }, {
         'seqrun_igf_id': '180416_M03291_0140_000000000-TEST',
         'flowcell_id': '000000000-TEST',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     # PROJECT
     project_data = [{'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     # SAMPLE
     sample_data = [{
         'sample_igf_id': 'IGF00123',
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'
     }, {
         'sample_igf_id': 'IGF00124',
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'
     }]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     # EXPERIMENT
     experiment_data = [{
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq',
         'sample_igf_id': 'IGF00123',
         'experiment_igf_id': 'IGF00123_MISEQ',
         'library_name': 'IGF00123',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'POLYA-RNA',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ',
         'singlecell_chemistry': 'TENX'
     }, {
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq',
         'sample_igf_id': 'IGF00124',
         'experiment_igf_id': 'IGF00124_MISEQ',
         'library_name': 'IGF00124',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'POLYA-RNA',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ',
         'singlecell_chemistry': 'TENX'
     }]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     # RUN
     run_data = [{
         'experiment_igf_id': 'IGF00123_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-TEST',
         'run_igf_id': 'IGF00123_MISEQ_000000000-TEST_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     # PIPELINE
     pipeline_data = [{
         "pipeline_name": "PrimaryAnalysis",
         "pipeline_db": "sqlite:////aln.db",
     }, {
         "pipeline_name": "DemultiplexingFastq",
         "pipeline_db": "sqlite:////fastq.db",
     }]
     pipeline_seed_data = [
         {
             'pipeline_name': 'PrimaryAnalysis',
             'seed_id': 1,
             'seed_table': 'experiment'
         },
         {
             'pipeline_name': 'PrimaryAnalysis',
             'seed_id': 2,
             'seed_table': 'experiment'
         },
         {
             'pipeline_name': 'DemultiplexingFastq',
             'seed_id': 1,
             'seed_table': 'seqrun'
         },
         {
             'pipeline_name': 'DemultiplexingFastq',
             'seed_id': 2,
             'seed_table': 'seqrun'
         },
     ]
     update_data = [{
         'pipeline_name': 'PrimaryAnalysis',
         'seed_id': 2,
         'seed_table': 'experiment',
         'status': 'FINISHED'
     }, {
         'pipeline_name': 'DemultiplexingFastq',
         'seed_id': 2,
         'seed_table': 'seqrun',
         'status': 'FINISHED'
     }]
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=pipeline_data)
     pla.create_pipeline_seed(data=pipeline_seed_data)
     pla.update_pipeline_seed(update_data)
     base.close_session()