예제 #1
0
    def test_reset_pipeline_seed_for_rerun(self):
        base = BaseAdaptor(**{'session_class': self.session_class})
        base.start_session()
        sra = SeqrunAdaptor(**{'session': base.session})
        seqrun = sra.fetch_seqrun_records_igf_id(
            seqrun_igf_id='171003_M00001_0089_000000000-TEST')
        pp = PipelineAdaptor(**{'session': base.session})
        pipeline = pp.fetch_pipeline_records_pipeline_name(
            'demultiplexing_fastq')
        pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id,
                                           seed_id=seqrun.seqrun_id,
                                           seed_table='seqrun')
        self.assertEqual(pipe_seed.status, 'SEEDED')
        pp.update_pipeline_seed(data=[{
            'pipeline_id': pipeline.pipeline_id,
            'seed_id': seqrun.seqrun_id,
            'seed_table': 'seqrun',
            'status': 'FINISHED',
        }])
        pipe_seed2 = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id,
                                            seed_id=seqrun.seqrun_id,
                                            seed_table='seqrun')
        self.assertEqual(pipe_seed2.status, 'FINISHED')
        base.close_session()

        with open(self.seqrun_input_list, 'w') as fp:
            fp.write('171003_M00001_0089_000000000-TEST')

        mps = Modify_pipeline_seed(igf_id_list=self.seqrun_input_list,
                                   table_name='seqrun',
                                   pipeline_name='demultiplexing_fastq',
                                   dbconfig_file=self.dbconfig,
                                   log_slack=False,
                                   log_asana=False,
                                   clean_up=True)
        mps.reset_pipeline_seed_for_rerun(seeded_label='SEEDED')

        base.start_session()
        sra = SeqrunAdaptor(**{'session': base.session})
        seqrun = sra.fetch_seqrun_records_igf_id(
            seqrun_igf_id='171003_M00001_0089_000000000-TEST')
        pp = PipelineAdaptor(**{'session': base.session})
        pipeline = pp.fetch_pipeline_records_pipeline_name(
            'demultiplexing_fastq')
        pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id,
                                           seed_id=seqrun.seqrun_id,
                                           seed_table='seqrun')
        self.assertEqual(pipe_seed.status, 'SEEDED')
        base.close_session()
  def run(self):
    try:
      igf_session_class = self.param_required('igf_session_class')              # set by base class
      pipeline_name = self.param_required('pipeline_name')
      igf_id = self.param_required('igf_id')
      task_id = self.param_required('task_id')
      seed_id = self.param_required('seed_id')
      seed_table = self.param_required('seed_table')
      new_status = self.param_required('new_status')

      pa = PipelineAdaptor(**{'session_class':igf_session_class})
      pa.start_session()                                                        # connect to db
      pa.update_pipeline_seed(\
        data=[{'pipeline_name':pipeline_name,
               'seed_id':int(seed_id),
               'seed_table':seed_table,
               'status':new_status.upper()}])                                   # update seed record in db
      pa.close_session()                                                        # close db connection
      message = \
        'changing status in {0} for seed {1} as {2}'.\
        format(\
          pipeline_name,
          seed_id,
          new_status.upper())                                                   # format message
      self.post_message_to_slack(message, reaction='pass')                      # send message to slack
      self.comment_asana_task(task_name=task_id, comment=message)               # send message to asana
    except Exception as e:
      message = \
        'seqrun: {2}, Error in {0}: {1}'.\
          format(\
            self.__class__.__name__,
            e,
            igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise
예제 #3
0
 def test_update_pipeline_seed(self):
     pl = PipelineAdaptor(**{'session_class': self.session_class})
     pl.start_session()
     pipeline_seed_data1 = [
         {
             'pipeline_name': 'demultiplexing_fastq',
             'seed_id': '2',
             'seed_table': 'seqrun',
         },
     ]
     with self.assertRaises(ValueError):
         pl.update_pipeline_seed(data=pipeline_seed_data1)
     pipeline_seed_data2 = [
         {
             'pipeline_name': 'demultiplexing_fastq',
             'seed_id': '2',
             'seed_table': 'seqrun',
             'status': 'RUNNING'
         },
     ]
     pl.update_pipeline_seed(data=pipeline_seed_data2)
     (pipe_seed1, table_data1) = pl.fetch_pipeline_seed_with_table_data(
         pipeline_name='demultiplexing_fastq')
     self.assertEqual(len(table_data1.to_dict(orient='records')),
                      len(pipe_seed1.to_dict(orient='records')))
     pipeline_seed_data3 = [
         {
             'pipeline_name': 'demultiplexing_fastq',
             'seed_id': '1',
             'seed_table': 'seqrun',
             'status': 'RUNNING'
         },
     ]
     pl.update_pipeline_seed(data=pipeline_seed_data3)
     (pipe_seed2, _) = pl.fetch_pipeline_seed_with_table_data(
         pipeline_name='demultiplexing_fastq', status='RUNNING')
     pl.close_session()
     self.assertEqual(
         pipe_seed2.loc[pipe_seed2.seed_id == 1]['status'].values[0],
         'RUNNING')
 def reset_pipeline_seed_for_rerun(self,
                                   seeded_label='SEEDED',
                                   restricted_status_list=('SEEDED',
                                                           'RUNNING')):
     '''
 A method for setting the pipeline for re-run if the first run has failed or aborted
 This method will set the pipeline_seed.status as 'SEEDED' only if its not already
 'SEEDED' or 'RUNNING'
 :param seeded_label: A text label for seeded status, default SEEDED
 :param restricted_status_list: A list of pipeline status to exclude from the search,
                                default ['SEEDED','RUNNING']
 '''
     try:
         db_connected = False
         restricted_status_list = list(restricted_status_list)
         input_id_list = self._read_input_list(
             igf_id_list=self.igf_id_list)  # get input ids from file
         failed_ids = list()  # define empty list of failed ids
         pass_list = list()  # required for logging in asana
         base = self.base_adaptor
         base.start_session()  # connect to database
         db_connected = True
         for igf_id in input_id_list:
             pipe_seed_data = self._fetch_pipeline_seed_entry(
                 igf_id=igf_id, restrict_seed_status=restricted_status_list
             )  # get pipe seed data for igf id
             if pipe_seed_data is None:
                 failed_ids.append(igf_id)  # add igf id to failed list
             else:
                 pl = PipelineAdaptor(**{'session': base.session
                                         })  # connect to pipeline adaptor
                 updated_seed_data = [{
                     'pipeline_id': pipe_seed_data.pipeline_id,
                     'seed_id': pipe_seed_data.seed_id,
                     'seed_table': pipe_seed_data.seed_table,
                     'status': seeded_label
                 }]  # set data for seed update
                 pl.update_pipeline_seed(
                     data=updated_seed_data,
                     autosave=False)  # update data to pipeline seed table
                 pass_list.append(igf_id)
         base.commit_session()  # save data to database after all changes
         base.close_session()  # close database connection
         db_connected = False
         if self.clean_up:
             self._clear_input_list(
                 file_path=self.igf_id_list, igf_list=failed_ids
             )  # over write input list and add failed ids for next try
             message = 'Overwriting pipeseed input list {0}'.format(
                 self.igf_id_list)
             if self.log_slack:
                 self.igf_slack.post_message_to_channel(
                     message, reaction='pass'
                 )  # comment to slack for file over writing
         if len(pass_list) > 0:
             for id_line in pass_list:
                 message='Changed pipeline seed for id {0}, pipeline {1}, to {2}'.\
                         format(id_line,self.pipeline_name,seeded_label)
                 if self.log_slack:
                     self.igf_slack.post_message_to_channel(
                         message,
                         reaction='pass')  # comment to slack channel
                 if self.log_asana:
                     self.igf_asana.comment_asana_task(
                         task_name=id_line,
                         comment=message)  # comment on asana task
     except Exception as e:
         if db_connected:
             base.rollback_session()
             base.close_session()
             message = 'Failed to update pipeline seed, Error: {0}'.format(
                 e)
             warnings.warn(message)
         if self.log_slack:
             self.igf_slack.post_message_to_channel(message,
                                                    reaction='fail')
         raise
예제 #5
0
  def run(self):
    '''
    Run method for the seed job factory class of the all pipelines
    
    :param igf_session_class: A database session class
    :param pipeline_name: Name of the pipeline
    :param seed_id_label: A text label for the seed_id, default seed_id
    :param seqrun_id_label: A text for seqrun_id column name, default seqrun_id
    :param seqrun_date_label: A text label for the seqrun date, default seqrun_date
    :param seqrun_igf_id_label: A text label for sequencing run igf id, default seqrun_igf_id
    :param seeded_label: A text label for the status seeded in pipeline_seed table, default SEEDED
    :param running_label: A text label for the status running in the pipeline_seed table, default RUNNING
    :param seed_status_label: A text label for the pipeline_seed status column name, default status
    :param experiment_id_label: A text label for the experiment_id, default experiment_id
    :param pipeseed_mode: A text label for pipeline mode, default demultiplexing
                          Allowed values are 
                             
                             demultiplexing
                             alignment
                             
    :returns: A list of dictionary containing the seqrun ids or experiment_igf_ids seed for analysis
    '''
    try:
      dbconnected=False
      igf_session_class = self.param_required('igf_session_class')              # set by base class
      pipeline_name = self.param_required('pipeline_name')
      seed_id_label = self.param_required('seed_id_label')
      seqrun_id_label = self.param_required('seqrun_id_label')
      seeded_label=self.param_required('seeded_label')
      running_label=self.param_required('running_label')
      seqrun_date_label=self.param_required('seqrun_date_label')
      seqrun_igf_id_label=self.param_required('seqrun_igf_id_label')
      seed_status_label=self.param_required('seed_status_label')
      experiment_id_label = self.param_required('experiment_id_label')
      pipeseed_mode=self.param_required('pipeseed_mode')

      if pipeseed_mode not in ('demultiplexing','alignment'):
        raise ValueError('Pipeseed_mode {0} not supported'.format(pipeseed_mode))

      pipeseeds_data,seed_data=get_pipeline_seeds(\
                                 pipeseed_mode=pipeseed_mode,
                                 pipeline_name=pipeline_name,
                                 igf_session_class=igf_session_class)           # fetch pipeseed data from db
      if len(seed_data.index)>0:
        seed_data=seed_data.\
                  to_dict(orient='records')                                     # convert dataframe to list of dictionaries
        self.param('sub_tasks',seed_data)                                       # set sub_tasks param for the data flow
        pipeseeds_data[seed_status_label]=pipeseeds_data[seed_status_label].\
                                          map({seeded_label:running_label})     # update seed records in pipeseed table, changed status to RUNNING
        pa = PipelineAdaptor(**{'session_class':igf_session_class})             # get db adaptor
        pa.start_session()                                                      # connect to db
        dbconnected=True
        pa.update_pipeline_seed(data=pipeseeds_data.to_dict(orient='records'),
                                autosave=False)                                 # set pipeline seeds as running
        pa.commit_session()                                                     # save changes to db
        pa.close_session()                                                      # close db connection
        dbconnected=False
        message='Total {0} new job found for {1}, pipeline: {2}'.\
                format(len(seed_data),self.__class__.__name__,pipeline_name)    # format msg for slack
        self.post_message_to_slack(message,reaction='pass')                     # send update to slack
      else:
        message='{0}, {1}: no new job created'.format(self.__class__.__name__,\
                                                      pipeline_name)            # format msg for failed jobs
        self.warning(message)
        self.post_message_to_slack(message,reaction='sleep')                    # post about failed job to slack

    except Exception as e:
      message='Error in {0},{1}: {2}'.format(self.__class__.__name__,\
                                             pipeline_name, e)                  # format slack msg
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # send msg to slack
      if dbconnected:
        pa.rollback_session()                                                   # remove changes from db
        pa.close_session()
      raise                                                                     # mark worker as failed
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.drop_all(self.engine)
     if os.path.exists(self.dbname):
         os.remove(self.dbname)
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     # PLATFORM
     platform_data = [{
         "platform_igf_id": "M03291",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     # SEQRUN
     seqrun_data = [{
         'seqrun_igf_id': '180416_M03291_0139_000000000-TEST',
         'flowcell_id': '000000000-TEST',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }, {
         'seqrun_igf_id': '180416_M03291_0140_000000000-TEST',
         'flowcell_id': '000000000-TEST',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     # PROJECT
     project_data = [{'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     # SAMPLE
     sample_data = [{
         'sample_igf_id': 'IGF00123',
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'
     }, {
         'sample_igf_id': 'IGF00124',
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'
     }]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     # EXPERIMENT
     experiment_data = [{
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq',
         'sample_igf_id': 'IGF00123',
         'experiment_igf_id': 'IGF00123_MISEQ',
         'library_name': 'IGF00123',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'POLYA-RNA',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ',
         'singlecell_chemistry': 'TENX'
     }, {
         'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq',
         'sample_igf_id': 'IGF00124',
         'experiment_igf_id': 'IGF00124_MISEQ',
         'library_name': 'IGF00124',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'POLYA-RNA',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ',
         'singlecell_chemistry': 'TENX'
     }]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     # RUN
     run_data = [{
         'experiment_igf_id': 'IGF00123_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-TEST',
         'run_igf_id': 'IGF00123_MISEQ_000000000-TEST_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     # PIPELINE
     pipeline_data = [{
         "pipeline_name": "PrimaryAnalysis",
         "pipeline_db": "sqlite:////aln.db",
     }, {
         "pipeline_name": "DemultiplexingFastq",
         "pipeline_db": "sqlite:////fastq.db",
     }]
     pipeline_seed_data = [
         {
             'pipeline_name': 'PrimaryAnalysis',
             'seed_id': 1,
             'seed_table': 'experiment'
         },
         {
             'pipeline_name': 'PrimaryAnalysis',
             'seed_id': 2,
             'seed_table': 'experiment'
         },
         {
             'pipeline_name': 'DemultiplexingFastq',
             'seed_id': 1,
             'seed_table': 'seqrun'
         },
         {
             'pipeline_name': 'DemultiplexingFastq',
             'seed_id': 2,
             'seed_table': 'seqrun'
         },
     ]
     update_data = [{
         'pipeline_name': 'PrimaryAnalysis',
         'seed_id': 2,
         'seed_table': 'experiment',
         'status': 'FINISHED'
     }, {
         'pipeline_name': 'DemultiplexingFastq',
         'seed_id': 2,
         'seed_table': 'seqrun',
         'status': 'FINISHED'
     }]
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=pipeline_data)
     pla.create_pipeline_seed(data=pipeline_seed_data)
     pla.update_pipeline_seed(update_data)
     base.close_session()