Ejemplo n.º 1
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     self.platform_json = 'data/platform_db_data.json'
     self.seqrun_json = 'data/seqrun_db_data.json'
     self.pipeline_json = 'data/pipeline_data.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     # load platform data
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=read_json_data(self.platform_json))
     # load seqrun data
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(
         data=read_json_data(self.seqrun_json))
     # load platform data
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=read_json_data(self.pipeline_json))
     pipeline_seed_data = [
         {
             'pipeline_name': 'demultiplexing_fastq',
             'seed_id': '1',
             'seed_table': 'seqrun'
         },
     ]
     pla.create_pipeline_seed(data=pipeline_seed_data)
     base.close_session()
  def setUp(self):
    self.dbconfig = 'data/dbconfig.json'
    dbparam=read_dbconf_json(self.dbconfig)
    base = BaseAdaptor(**dbparam)
    self.engine = base.engine
    self.dbname=dbparam['dbname']
    Base.metadata.drop_all(self.engine)
    if os.path.exists(self.dbname):
      os.remove(self.dbname)
    Base.metadata.create_all(self.engine)
    self.session_class=base.get_session_class()

    base = BaseAdaptor(**{'session_class':self.session_class})
    base.start_session()
    platform_data=[{ "platform_igf_id" : "M001",
                     "model_name" : "MISEQ" ,
                     "vendor_name" : "ILLUMINA" ,
                     "software_name" : "RTA",
                     "software_version" : "RTA1.18.54"}]                        # platform data
    flowcell_rule_data=[{"platform_igf_id":"M001",
                         "flowcell_type":"MISEQ",
                         "index_1":"NO_CHANGE",
                         "index_2":"NO_CHANGE"}]                                # flowcell rule data
    pl=PlatformAdaptor(**{'session':base.session})
    pl.store_platform_data(data=platform_data)                                  # loading platform data
    pl.store_flowcell_barcode_rule(data=flowcell_rule_data)                     # loading flowcell rules data
    project_data=[{'project_igf_id':'ProjectA'}]                                # project data
    pa=ProjectAdaptor(**{'session':base.session})
    pa.store_project_and_attribute_data(data=project_data)                      # load project data
    sample_data=[{'sample_igf_id':'SampleA',
                  'project_igf_id':'ProjectA'}]                                 # sample data
    sa=SampleAdaptor(**{'session':base.session})
    sa.store_sample_and_attribute_data(data=sample_data)                        # store sample data
    seqrun_data=[{'seqrun_igf_id':'SeqrunA', 
                  'flowcell_id':'000000000-D0YLK', 
                  'platform_igf_id':'M001',
                  'flowcell':'MISEQ'}]                                          # seqrun data
    sra=SeqrunAdaptor(**{'session':base.session})
    sra.store_seqrun_and_attribute_data(data=seqrun_data)                       # load seqrun data
    experiment_data=[{'experiment_igf_id':'ExperimentA',
                      'sample_igf_id':'SampleA',
                      'library_name':'SampleA',
                      'platform_name':'MISEQ',
                      'project_igf_id':'ProjectA'}]                             # experiment data
    ea=ExperimentAdaptor(**{'session':base.session})
    ea.store_project_and_attribute_data(data=experiment_data)                   # load experiment data
    base.commit_session()
    base.close_session()
 def test_fetch_flowcell_barcode_rules_for_seqrun(self):
   sra=SeqrunAdaptor(**{'session_class': self.session_class})
   sra.start_session()
   sra_data=sra.fetch_flowcell_barcode_rules_for_seqrun(seqrun_igf_id='170101_K00001_0001_AHABCDEFGH')
   sra.close_session()
   sra_data=sra_data.to_dict(orient='records')[0]
   self.assertEqual(sra_data['index_2'],'REVCOMP')
Ejemplo n.º 4
0
def load_new_seqrun_data(data_file, dbconfig):
  '''
  A method for loading new data for seqrun table
  '''
  try:
    formatted_data=read_json_data(data_file)
    dbparam=read_dbconf_json(dbconfig)
    sr=SeqrunAdaptor(**dbparam)
    sr.start_session()
    sr.store_seqrun_and_attribute_data(data=formatted_data)
    sr.close_session()
  except:
    raise
 def setUp(self):
     self.dbconfig='data/dbconfig.json'
     self.platform_json='data/platform_db_data.json'
     self.seqrun_json='data/seqrun_db_data.json'
     self.pipeline_json='data/pipeline_data.json'
     self.flowcell_rules_json='data/flowcell_rules.json'
     dbparam=read_dbconf_json(self.dbconfig)
     base=BaseAdaptor(**dbparam)
     self.engine=base.engine
     self.dbname=dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class=base.get_session_class()
     base.start_session()
     # load platform data
     pl=PlatformAdaptor(**{'session':base.session})
     pl.store_platform_data(data=read_json_data(self.platform_json))
     pl.store_flowcell_barcode_rule(data=read_json_data(self.flowcell_rules_json))
     # load seqrun data
     sra=SeqrunAdaptor(**{'session':base.session})
     sra.store_seqrun_and_attribute_data(data=read_json_data(self.seqrun_json))
     base.close_session()
Ejemplo n.º 6
0
    def test_reset_pipeline_seed_for_rerun(self):
        base = BaseAdaptor(**{'session_class': self.session_class})
        base.start_session()
        sra = SeqrunAdaptor(**{'session': base.session})
        seqrun = sra.fetch_seqrun_records_igf_id(
            seqrun_igf_id='171003_M00001_0089_000000000-TEST')
        pp = PipelineAdaptor(**{'session': base.session})
        pipeline = pp.fetch_pipeline_records_pipeline_name(
            'demultiplexing_fastq')
        pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id,
                                           seed_id=seqrun.seqrun_id,
                                           seed_table='seqrun')
        self.assertEqual(pipe_seed.status, 'SEEDED')
        pp.update_pipeline_seed(data=[{
            'pipeline_id': pipeline.pipeline_id,
            'seed_id': seqrun.seqrun_id,
            'seed_table': 'seqrun',
            'status': 'FINISHED',
        }])
        pipe_seed2 = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id,
                                            seed_id=seqrun.seqrun_id,
                                            seed_table='seqrun')
        self.assertEqual(pipe_seed2.status, 'FINISHED')
        base.close_session()

        with open(self.seqrun_input_list, 'w') as fp:
            fp.write('171003_M00001_0089_000000000-TEST')

        mps = Modify_pipeline_seed(igf_id_list=self.seqrun_input_list,
                                   table_name='seqrun',
                                   pipeline_name='demultiplexing_fastq',
                                   dbconfig_file=self.dbconfig,
                                   log_slack=False,
                                   log_asana=False,
                                   clean_up=True)
        mps.reset_pipeline_seed_for_rerun(seeded_label='SEEDED')

        base.start_session()
        sra = SeqrunAdaptor(**{'session': base.session})
        seqrun = sra.fetch_seqrun_records_igf_id(
            seqrun_igf_id='171003_M00001_0089_000000000-TEST')
        pp = PipelineAdaptor(**{'session': base.session})
        pipeline = pp.fetch_pipeline_records_pipeline_name(
            'demultiplexing_fastq')
        pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id,
                                           seed_id=seqrun.seqrun_id,
                                           seed_table='seqrun')
        self.assertEqual(pipe_seed.status, 'SEEDED')
        base.close_session()
Ejemplo n.º 7
0
    def test_load_seqrun_files_to_db(self):
        valid_seqrun_dir = find_new_seqrun_dir(path=self.path,
                                               dbconfig=self.dbconfig)
        new_seqrun_and_md5 = calculate_file_md5(seqrun_info=valid_seqrun_dir,
                                                md5_out=self.md5_out_path,
                                                seqrun_path=self.path)
        load_seqrun_files_to_db(seqrun_info=valid_seqrun_dir,
                                seqrun_md5_info=new_seqrun_and_md5,
                                dbconfig=self.dbconfig)

        # check in db
        dbparam = None
        with open(self.dbconfig, 'r') as json_data:
            dbparam = json.load(json_data)
        sra = SeqrunAdaptor(**dbparam)
        sra.start_session()
        sra_data = sra.fetch_seqrun_records_igf_id(seqrun_igf_id='seqrun1')
        sra.close_session()
        self.assertEqual(sra_data.flowcell_id, 'HXXXXXXXX')

        seed_pipeline_table_for_new_seqrun(
            pipeline_name='demultiplexing_fastq', dbconfig=self.dbconfig)
        # check in db
        dbparam = None
        with open(self.dbconfig, 'r') as json_data:
            dbparam = json.load(json_data)

        base = BaseAdaptor(**dbparam)
        base.start_session()
        seeds=base.fetch_records(query=base.session.query(Seqrun.seqrun_igf_id).\
                                       join(Pipeline_seed, Pipeline_seed.seed_id==Seqrun.seqrun_id).\
                                       join(Pipeline, Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\
                                       filter(Pipeline.pipeline_name=='demultiplexing_fastq').\
                                       filter(Pipeline_seed.seed_table=='seqrun'), output_mode='object')
        base.close_session()
        self.assertTrue('seqrun1' in [s.seqrun_igf_id for s in seeds])
Ejemplo n.º 8
0
    def setUp(self):
        self.path = 'data/seqrun_dir'
        self.dbconfig = 'data/dbconfig.json'
        self.md5_out_path = 'data/md5_dir'
        self.pipeline_name = 'demultiplexing_fastq'

        seqrun_json = 'data/seqrun_db_data.json'
        platform_json = 'data/platform_db_data.json'
        pipeline_json = 'data/pipeline_data.json'

        os.mkdir(self.md5_out_path)
        dbparam = None
        with open(self.dbconfig, 'r') as json_data:
            dbparam = json.load(json_data)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        self.pipeline_name = ''
        Base.metadata.create_all(self.engine)
        base.start_session()
        user_data = [
            {
                'name': 'user1',
                'email_id': '*****@*****.**',
                'username': '******'
            },
        ]
        ua = UserAdaptor(**{'session': base.session})
        ua.store_user_data(data=user_data)
        project_data = [{
            'project_igf_id': 'project_1',
            'project_name': 'test_22-8-2017_rna',
            'description': 'Its project 1',
            'project_deadline': 'Before August 2017',
            'comments': 'Some samples are treated with drug X',
        }]
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(data=project_data)
        project_user_data = [{
            'project_igf_id': 'project_1',
            'email_id': '*****@*****.**',
            'data_authority': True
        }]
        pa.assign_user_to_project(data=project_user_data)
        sample_data = [
            {
                'sample_igf_id': 'IGF0001',
                'project_igf_id': 'project_1',
            },
            {
                'sample_igf_id': 'IGF0002',
                'project_igf_id': 'project_1',
            },
            {
                'sample_igf_id': 'IGF0003',
                'project_igf_id': 'project_1',
            },
        ]
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(data=sample_data)
        base.commit_session()
        with open(pipeline_json,
                  'r') as json_data:  # store pipeline data to db
            pipeline_data = json.load(json_data)
            pa = PipelineAdaptor(**{'session': base.session})
            pa.store_pipeline_data(data=pipeline_data)

        with open(platform_json,
                  'r') as json_data:  # store platform data to db
            platform_data = json.load(json_data)
            pl = PlatformAdaptor(**{'session': base.session})
            pl.store_platform_data(data=platform_data)

        with open(seqrun_json, 'r') as json_data:  # store seqrun data to db
            seqrun_data = json.load(json_data)
            sra = SeqrunAdaptor(**{'session': base.session})
            sra.store_seqrun_and_attribute_data(data=seqrun_data)
            base.close_session()
Ejemplo n.º 9
0
    def run(self):
        try:
            igf_session_class = self.param_required('igf_session_class')
            seqrun_igf_id = self.param_required('seqrun_igf_id')
            seqrun_local_dir = self.param_required('seqrun_local_dir')
            base_work_dir = self.param_required('base_work_dir')
            samplesheet_filename = self.param('samplesheet_filename')
            index2_label = self.param('index2_label_in_db')
            revcomp_label = self.param('revcomp_label')
            singlecell_tag = self.param('singlecell_tag')
            adapter_trim_check = self.param('adapter_trim_check')
            adapter_section = self.param('adapter_section')
            read1_adapter_label = self.param('read1_adapter_label')
            read2_adapter_label = self.param('read2_adapter_label')
            project_type = self.param('project_type')

            job_name = self.job_name()
            work_dir = \
              os.path.join(\
                base_work_dir,
                seqrun_igf_id,
                job_name)                                                             # get work directory name
            if not os.path.exists(work_dir):
                os.makedirs(work_dir, mode=0o770)  # create work directory

            output_file = \
              os.path.join(\
                work_dir,
                samplesheet_filename)                                                 # get name of the output file
            if os.path.exists(output_file):
                raise IOError('seqrun: {0}, reformatted samplesheet {1} already present'.\
                              format(seqrun_igf_id,output_file))

            samplesheet_file = \
              os.path.join(\
                seqrun_local_dir,
                seqrun_igf_id,
                samplesheet_filename)
            if not os.path.exists(samplesheet_file):
                raise IOError('seqrun: {0}, samplesheet file {1} not found'.\
                              format(seqrun_igf_id,samplesheet_file))

            samplesheet_sc = \
              SampleSheet(infile=samplesheet_file)                                    # read samplesheet for single cell check
            samplesheet_sc.\
              filter_sample_data(\
                condition_key='Description',
                condition_value=singlecell_tag,
                method='include')                                                     # get 10X samplesheet
            if len(samplesheet_sc._data) > 0:
                project_type = singlecell_tag  # check if 10x samples are present in samplesheet

            samplesheet = \
              SampleSheet(infile=samplesheet_file)                                    # read samplesheet
            samplesheet.\
              filter_sample_data(\
                condition_key='Description',
                condition_value=singlecell_tag,
                method='exclude')                                                     # filter 10X samplesheet

            if adapter_trim_check:
                read1_val = \
                  samplesheet.\
                    check_sample_header(\
                      section=adapter_section,
                      condition_key=read1_adapter_label)
                read2_val = \
                  samplesheet.\
                    check_sample_header(\
                      section=adapter_section,
                      condition_key=read2_adapter_label)
                if read1_val == 0 or read2_val == 0:
                    message = \
                      'seqrun {0} samplesheet does not have adapter trim option, read1: {1}, read2:{2}'.\
                      format(seqrun_igf_id,read1_val,read2_val)
                    self.post_message_to_slack(message, reaction='pass')
                    self.comment_asana_task(
                        task_name=seqrun_igf_id, comment=message
                    )  # send info about adapter trip to slack and asana

            sa = SeqrunAdaptor(**{'session_class': igf_session_class})
            sa.start_session()
            rules_data = \
              sa.fetch_flowcell_barcode_rules_for_seqrun(seqrun_igf_id)               # convert index based on barcode rules
            sa.close_session()

            rules_data_set = rules_data.to_dict(
                orient='records')  # convert dataframe to dictionary
            if len(rules_data_set) > 0:
                rules_data = rules_data_set[0]  # consider only the first rule
                if rules_data[index2_label] == revcomp_label:
                    samplesheet.\
                      get_reverse_complement_index(index_field='index2')                  # reverse complement index2 based on the rules
            else:
                message = \
                  'no rules found for seqrun {0}, using original samplesheet'.\
                  format(seqrun_igf_id)
                self.post_message_to_slack(message, reaction='pass')
                self.comment_asana_task(task_name=seqrun_igf_id,
                                        comment=message)

            if len(samplesheet_sc._data) > 0:  # merge 10x samplesheet
                samplesheet._data.extend(samplesheet_sc._data)
                message = \
                  'seqrun: {0}, merging 10X samples with reformatted samplesheet'.\
                  format(seqrun_igf_id)
                self.post_message_to_slack(message, reaction='pass')
                self.comment_asana_task(task_name=seqrun_igf_id,
                                        comment=message)

            samplesheet.print_sampleSheet(outfile=output_file)
            self.param('dataflow_params', {
                'samplesheet': output_file,
                'project_type': project_type
            })
            message = \
              'seqrun: {0}, reformatted samplesheet:{1}'.\
                format(\
                  seqrun_igf_id,
                  output_file)
            self.post_message_to_slack(message, reaction='pass')
            self.comment_asana_task(task_name=seqrun_igf_id, comment=message)
        except Exception as e:
            message = \
              'seqrun: {2}, Error in {0}: {1}'.\
                format(\
                  self.__class__.__name__,
                  e,
                  seqrun_igf_id)
            self.warning(message)
            self.post_message_to_slack(
                message, reaction='fail')  # post msg to slack for failed jobs
            raise
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [{
         "platform_igf_id": "M03291",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }, {
         "platform_igf_id": "NB501820",
         "model_name": "NEXTSEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }, {
         "platform_igf_id": "K00345",
         "model_name": "HISEQ4000",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 SR",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }, {
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 PE",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "NB501820",
         "flowcell_type": "NEXTSEQ",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     seqrun_data = [{
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'flowcell_id': '000000000-BRN47',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }, {
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'flowcell_id': '000000001-BRN47',
         'platform_igf_id': 'NB501820',
         'flowcell': 'NEXTSEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     project_data = [{'project_igf_id': 'projectA'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'sampleA',
             'project_igf_id': 'projectA',
             'species_name': 'HG38'
         },
         {
             'sample_igf_id': 'sampleB',
             'project_igf_id': 'projectA',
             'species_name': 'UNKNOWN'
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_MISEQ',
             'library_name': 'sampleA',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_NEXTSEQ',
             'library_name': 'sampleA',
             'library_source': 'UNKNOWN',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'NEXTSEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleB',
             'experiment_igf_id': 'sampleB_MISEQ',
             'library_name': 'sampleB',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'sampleA_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'sampleA_NEXTSEQ',
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'lane_number': '2'
     }, {
         'experiment_igf_id': 'sampleB_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     file_data = [
         {
             'file_path':
             '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path':
             '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [{
         'name': 'sampleA_MISEQ_000000000-BRN47_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleB_MISEQ_HVWN7BBXX_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }]
     collection_files_data = [{
         'name':
         'sampleA_MISEQ_000000000-BRN47_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz'
     }, {
         'name':
         'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz'
     }, {
         'name':
         'sampleB_MISEQ_HVWN7BBXX_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz'
     }]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     base.close_session()
    }, {
        'run_igf_id': 'RunC',
        'experiment_igf_id': 'ExperimentA',
        'seqrun_igf_id': '180410_K00345_0063_AHWL7CBBXX',
        'lane_number': '1'
    }]  # run data
    base.start_session()
    pl = PlatformAdaptor(**{'session': base.session})
    pl.store_platform_data(data=platform_data)  # loading platform data
    pl.store_flowcell_barcode_rule(
        data=flowcell_rule_data)  # loading flowcell rules data
    pa = ProjectAdaptor(**{'session': base.session})
    pa.store_project_and_attribute_data(data=project_data)  # load project data
    sa = SampleAdaptor(**{'session': base.session})
    sa.store_sample_and_attribute_data(data=sample_data)  # store sample data
    sra = SeqrunAdaptor(**{'session': base.session})
    sra.store_seqrun_and_attribute_data(data=seqrun_data)  # load seqrun data
    ea = ExperimentAdaptor(**{'session': base.session})
    ea.store_project_and_attribute_data(
        data=experiment_data)  # load experiment data
    ra = RunAdaptor(**{'session': base.session})
    ra.store_run_and_attribute_data(data=run_data)  # load run data
    pipeline_data = [{
        "pipeline_name": "DemultiplexIlluminaFastq",
        "pipeline_db": "sqlite:////bcl2fastq.db",
    }]

    pipeline_seed_data = [
        {
            'pipeline_name': 'DemultiplexIlluminaFastq',
            'seed_id': 1,
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.drop_all(self.engine)
        if os.path.exists(self.dbname):
            os.remove(self.dbname)
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        self.temp_work_dir = get_temp_dir()
        self.temp_base_dir = get_temp_dir()
        self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz']
        for file_name in self.input_list:
            file_path = os.path.join(self.temp_work_dir, file_name)
            with open(file_path, 'w') as fq:
                fq.write('AAAA')  # create input files

        base = BaseAdaptor(**{'session_class': self.session_class})
        base.start_session()
        platform_data = [{
            "platform_igf_id": "M001",
            "model_name": "MISEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA1.18.54"
        }]  # platform data
        flowcell_rule_data = [{
            "platform_igf_id": "M001",
            "flowcell_type": "MISEQ",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }]  # flowcell rule data
        pl = PlatformAdaptor(**{'session': base.session})
        pl.store_platform_data(data=platform_data)  # loading platform data
        pl.store_flowcell_barcode_rule(
            data=flowcell_rule_data)  # loading flowcell rules data
        project_data = [{'project_igf_id': 'ProjectA'}]  # project data
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(
            data=project_data)  # load project data
        sample_data = [{
            'sample_igf_id': 'SampleA',
            'project_igf_id': 'ProjectA'
        }]  # sample data
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(
            data=sample_data)  # store sample data
        seqrun_data = [{
            'seqrun_igf_id': 'SeqrunA',
            'flowcell_id': '000000000-D0YLK',
            'platform_igf_id': 'M001',
            'flowcell': 'MISEQ'
        }]  # seqrun data
        sra = SeqrunAdaptor(**{'session': base.session})
        sra.store_seqrun_and_attribute_data(
            data=seqrun_data)  # load seqrun data
        experiment_data = [{
            'experiment_igf_id': 'ExperimentA',
            'sample_igf_id': 'SampleA',
            'library_name': 'SampleA',
            'platform_name': 'MISEQ',
            'project_igf_id': 'ProjectA'
        }]  # experiment data
        ea = ExperimentAdaptor(**{'session': base.session})
        ea.store_project_and_attribute_data(
            data=experiment_data)  # load experiment data
        run_data = [{
            'run_igf_id': 'RunA',
            'experiment_igf_id': 'ExperimentA',
            'seqrun_igf_id': 'SeqrunA',
            'lane_number': '1'
        }]  # run data
        ra = RunAdaptor(**{'session': base.session})
        ra.store_run_and_attribute_data(data=run_data)  # load run data
        base.commit_session()
        base.close_session()
Ejemplo n.º 13
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        # load platform data
        platform_data=\
          [{"platform_igf_id" : "M03291" ,
            "model_name" : "MISEQ" ,
            "vendor_name" : "ILLUMINA" ,
            "software_name" : "RTA" ,
            "software_version" : "RTA1.18.54"
           },
           {"platform_igf_id" : "NB501820",
            "model_name" : "NEXTSEQ",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           },
           {"platform_igf_id" : "K00345",
            "model_name" : "HISEQ4000",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           }]

        flowcell_rule_data=\
          [{"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 SR",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"},
           {"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 PE",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"NB501820",
            "flowcell_type":"NEXTSEQ",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"M03291",
            "flowcell_type":"MISEQ",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"}]

        pl = PlatformAdaptor(**{'session_class': base.session_class})
        pl.start_session()
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        pl.close_session()

        # load project data

        project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}]
        pa = ProjectAdaptor(**{'session_class': base.session_class})
        pa.start_session()
        pa.store_project_and_attribute_data(data=project_data)
        pa.close_session()

        # load samples

        sample_data = [
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109792',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109793',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109794',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109795',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109796',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797_1',
                'expected_read': 40000000
            },
        ]

        sa = SampleAdaptor(**{'session_class': base.session_class})
        sa.start_session()
        sa.store_sample_and_attribute_data(data=sample_data)
        sa.close_session()

        # load seqrun data

        seqrun_data = [{
            'flowcell_id': 'HV2GJBBXX',
            'platform_igf_id': 'K00345',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX'
        }]

        sra = SeqrunAdaptor(**{'session_class': base.session_class})
        sra.start_session()
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        sra.close_session()

        # load experiment data

        experiment_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'library_name': 'IGF109792',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109792',
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'library_name': 'IGF109793',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109793',
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'library_name': 'IGF109794',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109794',
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'library_name': 'IGF109795',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109795',
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'library_name': 'IGF109796',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109796',
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'library_name': 'IGF109797',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109797',
           },
          ]

        ea = ExperimentAdaptor(**{'session_class': base.session_class})
        ea.start_session()
        ea.store_project_and_attribute_data(data=experiment_data)
        ea.close_session()

        # load run data

        run_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':288046541
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':14666330
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':5009143
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1391747
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': '	IGF109796_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1318008
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1216324
           },
          ]

        ra = RunAdaptor(**{'session_class': base.session_class})
        ra.start_session()
        ra.store_run_and_attribute_data(data=run_data)
        ra.close_session()
Ejemplo n.º 14
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [{
         "platform_igf_id": "M00001",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }, {
         "platform_igf_id": "NB500000",
         "model_name": "NEXTSEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }, {
         "platform_igf_id": "K00000",
         "model_name": "HISEQ4000",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "K00000",
         "flowcell_type": "HiSeq 3000/4000 SR",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }, {
         "platform_igf_id": "K00000",
         "flowcell_type": "HiSeq 3000/4000 PE",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "NB500000",
         "flowcell_type": "NEXTSEQ",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "M00001",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     seqrun_data = [{
         'seqrun_igf_id': '171003_M00001_0089_000000000-TEST',
         'flowcell_id': '000000000-D0YLK',
         'platform_igf_id': 'M00001',
         'flowcell': 'MISEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     seqrun = sra.fetch_seqrun_records_igf_id(
         seqrun_igf_id='171003_M00001_0089_000000000-TEST')
     pipeline_data = [{
         "pipeline_name": "demultiplexing_fastq",
         "pipeline_db": "sqlite:////data/bcl2fastq.db",
         "pipeline_init_conf": {
             "input_dir": "data/seqrun_dir/",
             "output_dir": "data"
         },
         "pipeline_run_conf": {
             "output_dir": "data"
         }
     }]
     pipeseed_data = [{
         "pipeline_name": "demultiplexing_fastq",
         "seed_table": "seqrun",
         "seed_id": seqrun.seqrun_id
     }]
     pp = PipelineAdaptor(**{'session': base.session})
     pp.store_pipeline_data(data=pipeline_data)
     pp.create_pipeline_seed(
         data=pipeseed_data,
         required_columns=['pipeline_id', 'seed_id', 'seed_table'])
     base.close_session()
     self.seqrun_input_list = 'data/reset_samplesheet_md5/seqrun_pipeline_reset_list.txt'
     with open(self.seqrun_input_list, 'w') as fp:
         fp.write('')
Ejemplo n.º 15
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        self.fastq_dir = 'data/collect_fastq_dir/sc_1_8'
        self.model_name = 'NEXTSEQ'
        self.flowcell_id = 'TESTABC'
        self.seqrun_igf_id = '171003_NB500000_0089_TESTABC'
        self.file_location = 'HPC_PROJECT'
        self.samplesheet_file = 'data/collect_fastq_dir/sc_1_8/SampleSheet.csv'
        self.samplesheet_filename = 'SampleSheet.csv'
        self.manifest_name = 'file_manifest.csv'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.session_class
        base.start_session()
        platform_data = [{
            "platform_igf_id": "M00001",
            "model_name": "MISEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA1.18.54"
        }, {
            "platform_igf_id": "NB500000",
            "model_name": "NEXTSEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA2"
        }, {
            "platform_igf_id": "K00000",
            "model_name": "HISEQ4000",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA2"
        }]
        flowcell_rule_data = [{
            "platform_igf_id": "K00000",
            "flowcell_type": "HiSeq 3000/4000 SR",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }, {
            "platform_igf_id": "K00000",
            "flowcell_type": "HiSeq 3000/4000 PE",
            "index_1": "NO_CHANGE",
            "index_2": "REVCOMP"
        }, {
            "platform_igf_id": "NB500000",
            "flowcell_type": "NEXTSEQ",
            "index_1": "NO_CHANGE",
            "index_2": "REVCOMP"
        }, {
            "platform_igf_id": "M00001",
            "flowcell_type": "MISEQ",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }]
        pl = PlatformAdaptor(**{'session': base.session})
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        project_data = [{
            'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            'project_name': 'test_22-8-2017_rna',
            'description': 'Its project 1',
            'project_deadline': 'Before August 2017',
            'comments': 'Some samples are treated with drug X',
        }]
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(data=project_data)
        sample_data = [
            {
                'sample_igf_id': 'IGF00001',
                'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            },
            {
                'sample_igf_id': 'IGF00002',
                'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            },
        ]
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(data=sample_data)

        seqrun_data = [{
            'seqrun_igf_id': '171003_NB500000_0089_TESTABC',
            'flowcell_id': 'TESTABC',
            'platform_igf_id': 'NB500000',
            'flowcell': 'NEXTSEQ',
        }]
        sra = SeqrunAdaptor(**{'session': base.session})
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        base.close_session()
Ejemplo n.º 16
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [
         {
             "platform_igf_id": "M03291",
             "model_name": "MISEQ",
             "vendor_name": "ILLUMINA",
             "software_name": "RTA",
             "software_version": "RTA1.18.54"
         },
     ]
     flowcell_rule_data = [{
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     project_data = [{'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [{
         'sample_igf_id': 'IGF103923',
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'species_name': 'HG38'
     }]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     seqrun_data = [
         {
             'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
             'flowcell_id': '000000000-BRN47',
             'platform_igf_id': 'M03291',
             'flowcell': 'MISEQ'
         },
     ]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     pipeline_data = [
         {
             "pipeline_name": "PrimaryAnalysis",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
         {
             "pipeline_name": "DemultiplexIlluminaFastq",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
     ]
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=pipeline_data)
     file_data = [
         {
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404'
         },
         {
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
         {
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
     ]
     collection_files_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz'
         },
     ]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     experiment_data = [{
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ',
         'library_name': 'IGF103923',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'TENX-TRANSCRIPTOME-3P',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }, {
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'library_name': 'IGF103923_1',
         'library_source': 'GENOMIC_SINGLE_CELL',
         'library_strategy': 'WGS',
         'experiment_type': 'UNKNOWN',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'IGF103923_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ1_000000000-BRN47_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     base.close_session()
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.drop_all(self.engine)
     if os.path.exists(self.dbname):
         os.remove(self.dbname)
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     # PLATFORM
     platform_data = [{
         "platform_igf_id": "M03291",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     # SEQRUN
     seqrun_data = [{
         'seqrun_igf_id': '180416_M03291_0139_000000000-TEST',
         'flowcell_id': '000000000-TEST',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }, {
         'seqrun_igf_id': '180416_M03291_0140_000000000-TEST',
         'flowcell_id': '000000000-TEST',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     # PROJECT
     project_data = [{'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     # SAMPLE
     sample_data = [{
         'sample_igf_id': 'IGF00123',
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'
     }, {
         'sample_igf_id': 'IGF00124',
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'
     }]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     # EXPERIMENT
     experiment_data = [{
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq',
         'sample_igf_id': 'IGF00123',
         'experiment_igf_id': 'IGF00123_MISEQ',
         'library_name': 'IGF00123',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'POLYA-RNA',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ',
         'singlecell_chemistry': 'TENX'
     }, {
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq',
         'sample_igf_id': 'IGF00124',
         'experiment_igf_id': 'IGF00124_MISEQ',
         'library_name': 'IGF00124',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'POLYA-RNA',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ',
         'singlecell_chemistry': 'TENX'
     }]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     # RUN
     run_data = [{
         'experiment_igf_id': 'IGF00123_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-TEST',
         'run_igf_id': 'IGF00123_MISEQ_000000000-TEST_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     # PIPELINE
     pipeline_data = [{
         "pipeline_name": "PrimaryAnalysis",
         "pipeline_db": "sqlite:////aln.db",
     }, {
         "pipeline_name": "DemultiplexingFastq",
         "pipeline_db": "sqlite:////fastq.db",
     }]
     pipeline_seed_data = [
         {
             'pipeline_name': 'PrimaryAnalysis',
             'seed_id': 1,
             'seed_table': 'experiment'
         },
         {
             'pipeline_name': 'PrimaryAnalysis',
             'seed_id': 2,
             'seed_table': 'experiment'
         },
         {
             'pipeline_name': 'DemultiplexingFastq',
             'seed_id': 1,
             'seed_table': 'seqrun'
         },
         {
             'pipeline_name': 'DemultiplexingFastq',
             'seed_id': 2,
             'seed_table': 'seqrun'
         },
     ]
     update_data = [{
         'pipeline_name': 'PrimaryAnalysis',
         'seed_id': 2,
         'seed_table': 'experiment',
         'status': 'FINISHED'
     }, {
         'pipeline_name': 'DemultiplexingFastq',
         'seed_id': 2,
         'seed_table': 'seqrun',
         'status': 'FINISHED'
     }]
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=pipeline_data)
     pla.create_pipeline_seed(data=pipeline_seed_data)
     pla.update_pipeline_seed(update_data)
     base.close_session()