def test_process_project_data_and_account(self):
     fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\
                                         dbconfig=self.dbconfig,\
                                         user_account_template='template/email_notification/send_new_account_info.txt',\
                                         log_slack=False,\
                                         setup_irods=False,\
                                         notify_user=False,\
                                         check_hpc_user=False,\
                                         )
     fa.process_project_data_and_account()
     dbparam = None
     with open(self.dbconfig, 'r') as json_data:
       dbparam = json.load(json_data)
     base = BaseAdaptor(**dbparam)
     base.start_session()
     pa=ProjectAdaptor(**{'session':base.session})
     project_exists=pa.check_project_records_igf_id(project_igf_id='IGFP0002_test_23-5-2017_rna')
     self.assertTrue(project_exists)
     ua=UserAdaptor(**{'session':base.session})
     user_exists=ua.check_user_records_email_id(email_id='*****@*****.**')
     self.assertTrue(user_exists)
     user1=ua.fetch_user_records_email_id(user_email_id='*****@*****.**')
     self.assertEqual(user1.name,'User2')
     sa=SampleAdaptor(**{'session':base.session})
     sample_exists=sa.check_sample_records_igf_id(sample_igf_id='IGF00006')
     self.assertTrue(sample_exists)
     project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\
                                                        email_id='*****@*****.**')
     self.assertTrue(project_user_exists)
     project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\
                                                        email_id='*****@*****.**')
     self.assertTrue(project_user_exists)
     base.close_session()
コード例 #2
0
 def test_check_data_authority_for_project(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     pa_results1 = pa.check_data_authority_for_project(
         project_igf_id='IGFP0001_test_22-8-2017_rna')
     self.assertTrue(pa_results1)
     pa_results2 = pa.check_data_authority_for_project(
         project_igf_id='IGFP0002_test_22-8-2017_rna')
     self.assertFalse(pa_results2)
     pa.close_session()
コード例 #3
0
 def test_fetch_data_authority_for_project(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     pa_results1 = pa.fetch_data_authority_for_project(
         project_igf_id='IGFP0001_test_22-8-2017_rna')
     self.assertEqual(pa_results1.email_id, '*****@*****.**')
     pa_results2 = pa.fetch_data_authority_for_project(
         project_igf_id='IGFP0002_test_22-8-2017_rna')
     self.assertEqual(pa_results2, None)
     pa.close_session()
コード例 #4
0
 def test_fetch_all_project_igf_ids(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     project_list = pa.fetch_all_project_igf_ids()
     pa.close_session()
     self.assertTrue('IGFP0002_test_22-8-2017_rna' in
                     project_list['project_igf_id'].values)
     self.assertTrue('IGFP0001_test_22-8-2017_rna' in
                     project_list['project_igf_id'].values)
     self.assertEqual(len(project_list['project_igf_id'].values), 2)
  def run(self):
    '''
    A ehive runnable method for uploading analysis files to irods server
    
    :param file_list: A list of file paths to upload to irods
    :param irods_exe_dir: Irods executable directory
    :param project_igf_id: Name of the project
    :param analysis_name: A string for analysis name, default is 'default'
    :param dir_path_list: A list of directory structure for irod server, default None for using datestamp
    :param file_tag: A text string for adding tag to collection, default None for only project_name
    '''
    try:
      project_igf_id = self.param_required('project_igf_id')
      igf_session_class = self.param_required('igf_session_class')
      irods_exe_dir = self.param_required('irods_exe_dir')
      file_list = self.param_required('file_list')
      analysis_name = self.param_required('analysis_name')
      dir_path_list = self.param_required('dir_path_list')
      file_tag = self.param_required('file_tag')

      pa = ProjectAdaptor(**{'session_class':igf_session_class})
      pa.start_session()
      user = \
        pa.fetch_data_authority_for_project(
          project_igf_id=project_igf_id)                                        # fetch user info from db
      pa.close_session()

      if user is None:
        raise ValueError('No user found for project {0}'.format(project_igf_id))

      username = user.username                                                  # get username for irods
      irods_upload = IGF_irods_uploader(irods_exe_dir)                          # create instance for irods upload
      for file_path in file_list:
        if not os.path.exists(file_path):
          raise IOError('Failed to find file {0} for irods upload'.\
                        format(file_path))

      irods_upload.\
        upload_analysis_results_and_create_collection(
          file_list=file_list,
          irods_user=username,
          project_name=project_igf_id,
          analysis_name=analysis_name,
          dir_path_list=dir_path_list,
          file_tag=file_tag)                                                    # upload analysis results to irods and build collection
    except Exception as e:
      message = \
        'project: {2}, Error in {0}: {1}'.format(
          self.__class__.__name__,
          e,
          project_igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise
コード例 #6
0
    def test_mark_project_barcode_check_off(self):
        pr = ProjectAdaptor(**{'session_class': self.session_class})
        pr.start_session()
        pr.store_project_and_attribute_data(self.data)
        pr.close_session()

        mark_project_barcode_check_off(
            project_igf_id='IGFP001_test1_24-1-18',
            session_class=self.session_class)  # no attribute record
        pr.start_session()
        attribute_check = pr.check_project_attributes(
            project_igf_id='IGFP001_test1_24-1-18',
            attribute_name='barcode_check')
        self.assertTrue(attribute_check)
        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP001_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')

        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP002_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'ON')
        pr.close_session()

        mark_project_barcode_check_off(
            project_igf_id='IGFP002_test1_24-1-18',
            session_class=self.session_class)  # barcode check ON
        pr.start_session()
        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP002_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')

        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP003_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')
        pr.close_session()

        mark_project_barcode_check_off(
            project_igf_id='IGFP003_test1_24-1-18',
            session_class=self.session_class)  # barcode check OFF
        pr.start_session()
        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP003_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')
        pr.close_session()
 def setUp(self):
   self.dbconfig='data/dbconfig.json'
   self.new_project_data='data/check_project_data/new_project_data.csv'
   dbparam = None
   with open(self.dbconfig, 'r') as json_data:
     dbparam = json.load(json_data)
   base = BaseAdaptor(**dbparam)
   self.engine = base.engine
   self.dbname=dbparam['dbname']
   Base.metadata.create_all(self.engine)
   self.session_class=base.session_class
   base.start_session()
   ua=UserAdaptor(**{'session':base.session})
   user_data=[{'name':'user1','email_id':'*****@*****.**','username':'******'},
              {'name':'igf','email_id':'*****@*****.**','username':'******'}]
   ua.store_user_data(data=user_data)
   project_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna',
                  'project_name':'test_22-8-2017_rna',
                  'description':'Its project 1',
                  'project_deadline':'Before August 2017',
                  'comments':'Some samples are treated with drug X',
                }]
   pa=ProjectAdaptor(**{'session':base.session})
   pa.store_project_and_attribute_data(data=project_data)
   project_user_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna',
                       'email_id':'*****@*****.**',
                       'data_authority':True}]
   pa.assign_user_to_project(data=project_user_data)
   sample_data=[{'sample_igf_id':'IGF00001',
                 'project_igf_id':'IGFP0001_test_22-8-2017_rna',},
                {'sample_igf_id':'IGF00002',
                 'project_igf_id':'IGFP0001_test_22-8-2017_rna',},
                {'sample_igf_id':'IGF00003',
                 'project_igf_id':'IGFP0001_test_22-8-2017_rna',},
                {'sample_igf_id':'IGF00004',
                 'project_igf_id':'IGFP0001_test_22-8-2017_rna',},
                {'sample_igf_id':'IGF00005', 
                 'project_igf_id':'IGFP0001_test_22-8-2017_rna',},
               ]
   sa=SampleAdaptor(**{'session':base.session})
   sa.store_sample_and_attribute_data(data=sample_data)
   base.close_session()
   new_project_data=[{'project_igf_id':'IGFP0002_test_23-5-2017_rna',
                      'name':'user2',
                      'email_id':'*****@*****.**',
                      'sample_igf_id':'IGF00006',
                     },
                     {'project_igf_id':'IGFP0003_test_24-8-2017_rna',
                      'name':'user2',
                      'email_id':'*****@*****.**',
                      'sample_igf_id':'IGF00007',
                      'barcode_check':'OFF'
                     }]
   pd.DataFrame(new_project_data).to_csv(os.path.join('.',self.new_project_data))
コード例 #8
0
 def test_count_project_samples(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     sample1 = pa.count_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna')
     self.assertEqual(sample1, 3)
     sample2 = pa.count_project_samples(
         project_igf_id='IGFP0002_test_22-8-2017_rna')
     self.assertEqual(sample2, 0)
     sample3 = pa.count_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna', only_active=False)
     self.assertEqual(sample3, 4)
     pa.close_session()
コード例 #9
0
  def setUp(self):
    self.dbconfig = 'data/dbconfig.json'
    dbparam=read_dbconf_json(self.dbconfig)
    base = BaseAdaptor(**dbparam)
    self.engine = base.engine
    self.dbname=dbparam['dbname']
    Base.metadata.drop_all(self.engine)
    if os.path.exists(self.dbname):
      os.remove(self.dbname)
    Base.metadata.create_all(self.engine)
    self.session_class=base.get_session_class()

    base = BaseAdaptor(**{'session_class':self.session_class})
    base.start_session()
    platform_data=[{ "platform_igf_id" : "M001",
                     "model_name" : "MISEQ" ,
                     "vendor_name" : "ILLUMINA" ,
                     "software_name" : "RTA",
                     "software_version" : "RTA1.18.54"}]                        # platform data
    flowcell_rule_data=[{"platform_igf_id":"M001",
                         "flowcell_type":"MISEQ",
                         "index_1":"NO_CHANGE",
                         "index_2":"NO_CHANGE"}]                                # flowcell rule data
    pl=PlatformAdaptor(**{'session':base.session})
    pl.store_platform_data(data=platform_data)                                  # loading platform data
    pl.store_flowcell_barcode_rule(data=flowcell_rule_data)                     # loading flowcell rules data
    project_data=[{'project_igf_id':'ProjectA'}]                                # project data
    pa=ProjectAdaptor(**{'session':base.session})
    pa.store_project_and_attribute_data(data=project_data)                      # load project data
    sample_data=[{'sample_igf_id':'SampleA',
                  'project_igf_id':'ProjectA'}]                                 # sample data
    sa=SampleAdaptor(**{'session':base.session})
    sa.store_sample_and_attribute_data(data=sample_data)                        # store sample data
    seqrun_data=[{'seqrun_igf_id':'SeqrunA', 
                  'flowcell_id':'000000000-D0YLK', 
                  'platform_igf_id':'M001',
                  'flowcell':'MISEQ'}]                                          # seqrun data
    sra=SeqrunAdaptor(**{'session':base.session})
    sra.store_seqrun_and_attribute_data(data=seqrun_data)                       # load seqrun data
    experiment_data=[{'experiment_igf_id':'ExperimentA',
                      'sample_igf_id':'SampleA',
                      'library_name':'SampleA',
                      'platform_name':'MISEQ',
                      'project_igf_id':'ProjectA'}]                             # experiment data
    ea=ExperimentAdaptor(**{'session':base.session})
    ea.store_project_and_attribute_data(data=experiment_data)                   # load experiment data
    base.commit_session()
    base.close_session()
コード例 #10
0
def mark_project_barcode_check_off(project_igf_id,
                                   session_class,
                                   barcode_check_attribute='barcode_check',
                                   barcode_check_val='OFF'):
    '''
  A utility method for marking project barcode check as off using the project_igf_id
  
  :param project_igf_id: A project_igf_id string
  :param session_class: A db session class object
  :param barcode_check_attribute: A text keyword for barcode check attribute, default barcode_check
  :param barcode_check_val: A text for barcode check attribute value, default is 'OFF'
  '''
    try:
        db_connected = False
        pr = ProjectAdaptor(**{'session_class': session_class})
        pr.start_session()
        db_connected = True
        pr_attributes = pr.check_project_attributes(
            project_igf_id=project_igf_id,
            attribute_name=barcode_check_attribute
        )  # check for the existing project attribute
        if pr_attributes:  # if attribute present, then modify it
            project = pr.fetch_project_records_igf_id(
                project_igf_id=project_igf_id)  # fetch project info
            query=pr.session.\
                  query(Project_attribute).\
                  filter(Project_attribute.attribute_name==barcode_check_attribute).\
                  filter(Project_attribute.project_id==project.project_id).\
                  update({Project_attribute.attribute_value:barcode_check_val,},
                         synchronize_session=False)                                   # create query for fetching attribute records and modify attribute records
        else:  # if project attribute is not present, store it
            data = [{
                'project_igf_id': project_igf_id,
                'attribute_name': barcode_check_attribute,
                'attribute_value': barcode_check_val
            }]  # create data structure for the attribute table
            pr.store_project_attributes(
                data, autosave=False
            )  # store data to attribute table without auto commit

        pr.commit_session()
    except:
        if db_connected:
            pr.rollback_session()
        raise
    finally:
        if db_connected:
            pr.close_session()
コード例 #11
0
def get_project_read_count(project_igf_id,
                           session_class,
                           run_attribute_name='R1_READ_COUNT',
                           active_status='ACTIVE'):
    '''
  A utility method for fetching sample read counts for an input project_igf_id
  
  :param project_igf_id: A project_igf_id string
  :param session_class: A db session class object
  :param run_attribute_name: Attribute name from Run_attribute table for read count lookup
  :param active_status: text label for active runs, default ACTIVE
  :returns: A pandas dataframe containing following columns
  
               project_igf_id
               sample_igf_id
               flowcell_id
               attribute_value
  '''
    try:
        read_count = pd.DataFrame()
        pr = ProjectAdaptor(**{'session_class': session_class})
        pr.start_session()
        query=pr.session.query(Project.project_igf_id,
                               Sample.sample_igf_id,
                               Seqrun.flowcell_id,
                               Run_attribute.attribute_value).\
                         join(Sample,Project.project_id==Sample.project_id).\
                         join(Experiment,Sample.sample_id==Experiment.sample_id).\
                         join(Run,Experiment.experiment_id==Run.experiment_id).\
                         join(Seqrun,Seqrun.seqrun_id==Run.seqrun_id).\
                         join(Run_attribute,Run.run_id==Run_attribute.run_id).\
                         filter(Project.project_igf_id==project_igf_id).\
                         filter(Sample.project_id==Project.project_id).\
                         filter(Experiment.sample_id==Sample.sample_id).\
                         filter(Run.experiment_id==Experiment.experiment_id).\
                         filter(Seqrun.seqrun_id==Run.seqrun_id).\
                         filter(Run_attribute.run_id==Run.run_id).\
                         filter(Run_attribute.attribute_name==run_attribute_name).\
                         filter(Run.status==active_status).\
                         filter(Experiment.status==active_status).\
                         filter(Sample.status==active_status)
        results = pr.fetch_records(query=query)
        pr.close_session()
        if len(results.index) > 0:
            read_count = results
        return read_count
    except:
        raise
コード例 #12
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }, {
         'project_igf_id': 'IGFP0002_test_22-8-2017_rna',
         'project_name': 'test_23-8-2017_rna',
         'description': 'Its project 2',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     base.start_session()
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sa = SampleAdaptor(**{'session': base.session})
     sample_data = [
         {
             'sample_igf_id': 'IGFS001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS004',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
             'status': 'FAILED',
         },
     ]
     sa.store_sample_and_attribute_data(data=sample_data)
     base.close_session()
コード例 #13
0
 def test_fetch_project_samples(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     sample1 = pa.fetch_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna',
         output_mode='dataframe')
     self.assertEqual(len(sample1.index), 3)
     sample2 = pa.fetch_project_samples(
         project_igf_id='IGFP0002_test_22-8-2017_rna',
         output_mode='dataframe')
     self.assertEqual(len(sample2.index), 0)
     sample3 = pa.fetch_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna',
         only_active=False,
         output_mode='dataframe')
     self.assertEqual(len(sample3.index), 4)
     pa.close_session()
コード例 #14
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }, {
         'project_igf_id': 'IGFP0002_test_22-8-2017_rna',
         'project_name': 'test_23-8-2017_rna',
         'description': 'Its project 2',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X'
     }]
     user_data = [{
         'name': 'UserA',
         'email_id': '*****@*****.**',
         'username': '******'
     }]
     project_user_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         'email_id': '*****@*****.**',
         'data_authority': True
     }, {
         'project_igf_id': 'IGFP0002_test_22-8-2017_rna',
         'email_id': '*****@*****.**'
     }]
     base.start_session()
     ua = UserAdaptor(**{'session': base.session})
     ua.store_user_data(data=user_data)
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     pa.assign_user_to_project(data=project_user_data)
     base.close_session()
コード例 #15
0
 def setUp(self):
   self.dbconfig = 'data/dbconfig.json'
   dbparam=read_dbconf_json(self.dbconfig)
   base = BaseAdaptor(**dbparam)
   self.engine = base.engine
   self.dbname=dbparam['dbname']
   Base.metadata.drop_all(self.engine)
   if os.path.exists(self.dbname):
     os.remove(self.dbname)
   Base.metadata.create_all(self.engine)
   self.session_class=base.get_session_class()
   base.start_session()
   project_data=[{'project_igf_id':'ProjectA'}]
   pa=ProjectAdaptor(**{'session':base.session})
   pa.store_project_and_attribute_data(data=project_data)                      # load project data
   sample_data=[{'sample_igf_id':'SampleA',
                 'project_igf_id':'ProjectA'}]                                 # sample data
   sa=SampleAdaptor(**{'session':base.session})
   sa.store_sample_and_attribute_data(data=sample_data)                        # store sample data
   experiment_data=[{'experiment_igf_id':'ExperimentA',
                     'sample_igf_id':'SampleA',
                     'library_name':'SampleA',
                     'platform_name':'MISEQ',
                     'project_igf_id':'ProjectA'}]                             # experiment data
   ea=ExperimentAdaptor(**{'session':base.session})
   ea.store_project_and_attribute_data(data=experiment_data)
   self.temp_dir=get_temp_dir()
   temp_files=['a.csv','b.csv']
   for temp_file in temp_files:
     with open(os.path.join(self.temp_dir,temp_file),'w') as fp:
       fp.write('A')
   collection_data=[{'name':'ExperimentA',
                     'type':'AnalysisA_html',
                     'table':'experiment',
                     'file_path':os.path.join(self.temp_dir,temp_file)}
                     for temp_file in temp_files]
   ca=CollectionAdaptor(**{'session':base.session})
   ca.load_file_and_create_collection(data=collection_data,
                                      calculate_file_size_and_md5=False)
   base.close_session()
コード例 #16
0
def get_seqrun_info_for_project(project_igf_id, session_class):
    '''
  A utility method for fetching seqrun_igf_id and flowcell_id which are linked
  to a specific project_igf_id
  
  required params:
  project_igf_id: A project_igf_id string
  session_class: A db session class object
  
  returns a pandas dataframe containing following columns
    seqrun_igf_id
    flowcell_id
  '''
    try:
        seqrun_info = pd.DataFrame()
        pr = ProjectAdaptor(**{'session_class': session_class})
        pr.start_session()
        query=pr.session.query(distinct(Seqrun.seqrun_igf_id).\
                               label('seqrun_igf_id'),
                               Seqrun.flowcell_id).\
                         join(Run,Seqrun.seqrun_id==Run.seqrun_id).\
                         join(Experiment,Experiment.experiment_id==Run.experiment_id).\
                         join(Sample,Sample.sample_id==Experiment.sample_id).\
                         join(Project,Project.project_id==Sample.project_id).\
                         filter(Project.project_id==Sample.project_id).\
                         filter(Sample.sample_id==Experiment.sample_id).\
                         filter(Experiment.experiment_id==Run.experiment_id).\
                         filter(Run.seqrun_id==Seqrun.seqrun_id).\
                         filter(Project.project_igf_id==project_igf_id)
        results = pr.fetch_records(query=query)
        pr.close_session()
        if len(results.index) > 0:
            seqrun_info = results
        return seqrun_info
    except:
        raise
コード例 #17
0
  def run(self):
    try:
      fastq_dir = self.param_required('fastq_dir')
      seqrun_igf_id = self.param_required('seqrun_igf_id')
      project_name = self.param_required('project_name')
      igf_session_class = self.param_required('igf_session_class')
      irods_exe_dir = self.param_required('irods_exe_dir')
      flowcell_id = self.param_required('flowcell_id')
      samplesheet_filename = self.param('samplesheet_filename')
      manifest_name = self.param_required('manifest_name')
      report_html = self.param('report_html')
      use_ephemeral_space = self.param('use_ephemeral_space')

      pa = ProjectAdaptor(**{'session_class':igf_session_class})
      pa.start_session()
      user = \
        pa.fetch_data_authority_for_project(\
          project_igf_id=project_name)                                          # fetch user info from db
      pa.close_session()

      if user is None:
        raise ValueError('No user found for project {0}'.\
                         format(project_name))

      username = user.username                                                  # get username for irods

      report_htmlname = os.path.basename(report_html)
      seqrun_date = seqrun_igf_id.split('_')[0]                                 # collect seqrun date from igf id
      seqrun_date = datetime.datetime.strptime(seqrun_date,'%y%m%d').date()     # identify actual date
      seqrun_date = str(seqrun_date)                                            # convert object to string
      irods_upload = IGF_irods_uploader(irods_exe_dir)                          # create instance for irods upload
      base_seq_dir = os.path.basename(fastq_dir)                                # get base name for the source dir
      tarfile_name = \
        '{0}_{1}_{2}.tar'.\
          format(\
            project_name,
            base_seq_dir,
            seqrun_date)                                                        # construct name of the tarfile
      temp_work_dir = \
        get_temp_dir(use_ephemeral_space=use_ephemeral_space)                   # get a temp dir
      tarfile_name = \
        os.path.join(
          temp_work_dir,
          tarfile_name)                                                         # create tarfile in the temp dir

      with tarfile.open(tarfile_name, "w") as tar:
        for root,_, files in os.walk(top=fastq_dir):
          if samplesheet_filename in files:
            samplesheet_file = \
              os.path.join(os.path.abspath(root),
                           samplesheet_filename)                                # get samplesheet filepath
            tmp_samplesheet_file = \
              os.path.join(
                temp_work_dir,
                '{0}_{1}_{2}_{3}'.\
                  format(
                    project_name,
                    base_seq_dir,
                    seqrun_date,
                    samplesheet_filename))
            copy2(
              samplesheet_file,
              tmp_samplesheet_file)                                             # change samplesheet filename
            tar.add(
              tmp_samplesheet_file,
              arcname=\
                os.path.relpath(
                  tmp_samplesheet_file,
                  start=temp_work_dir))                                         # add samplesheet file to tar

          if report_htmlname in files:
            for file in files:
              if fnmatch.fnmatch(os.path.join(root,file),report_html):
                report_file = os.path.join(os.path.abspath(root),file)          # get filepath for the report
                tmp_report_file = \
                  os.path.join(\
                    temp_work_dir,
                    '{0}_{1}_{2}_{3}'.\
                    format(\
                      project_name,
                      base_seq_dir,
                      seqrun_date,
                      os.path.basename(report_file)))                           # change report name
                copy2(report_file, tmp_report_file)                             # copy report file to temp
                tar.add(tmp_report_file,
                        arcname=os.path.relpath(tmp_report_file,
                                                start=temp_work_dir))           # add demultiplexing report to tar

          if manifest_name in files:
            manifest_file = \
              os.path.join(os.path.abspath(root),
                           manifest_name)                                       # get samplesheet filepath
            tmp_manifest_file = \
              os.path.join(\
                temp_work_dir,
                '{0}_{1}_{2}_{3}'.\
                format(\
                  project_name,
                  base_seq_dir,
                  seqrun_date,
                  manifest_name))                                               # change manifest name
            copy2(manifest_file,tmp_manifest_file)                              # copy manifest to temp
            tar.add(tmp_manifest_file,
                    arcname=os.path.relpath(tmp_manifest_file,
                                            start=temp_work_dir))               # add samplesheet file to tar

          for file in files:
            if fnmatch.fnmatch(file, '*.fastq.gz') and \
              not fnmatch.fnmatch(file, 'Undetermined_*'):
              fastq_file_path = os.path.join(os.path.abspath(root),file)        # get filepath for the fastq files
              tar.add(fastq_file_path,
                      arcname=os.path.relpath(fastq_file_path,
                                              start=fastq_dir))                 # add fastq file to tar

      irods_upload.\
      upload_fastqfile_and_create_collection(\
        filepath=tarfile_name,
        irods_user=username,
        project_name=project_name,
        run_igf_id=seqrun_igf_id,
        flowcell_id=flowcell_id,
        run_date=seqrun_date)                                                   # upload fastq data to irods
      remove_dir(temp_work_dir)                                                 # remove temp dir once data uoload is done
    except Exception as e:
      message = \
        'seqrun: {2}, Error in {0}: {1}'.\
        format(\
          self.__class__.__name__,
          e,
          seqrun_igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise
コード例 #18
0
        'run_igf_id': 'RunB',
        'experiment_igf_id': 'ExperimentA',
        'seqrun_igf_id': '180610_K00345_0063_AHWL7CBBXX',
        'lane_number': '1'
    }, {
        'run_igf_id': 'RunC',
        'experiment_igf_id': 'ExperimentA',
        'seqrun_igf_id': '180410_K00345_0063_AHWL7CBBXX',
        'lane_number': '1'
    }]  # run data
    base.start_session()
    pl = PlatformAdaptor(**{'session': base.session})
    pl.store_platform_data(data=platform_data)  # loading platform data
    pl.store_flowcell_barcode_rule(
        data=flowcell_rule_data)  # loading flowcell rules data
    pa = ProjectAdaptor(**{'session': base.session})
    pa.store_project_and_attribute_data(data=project_data)  # load project data
    sa = SampleAdaptor(**{'session': base.session})
    sa.store_sample_and_attribute_data(data=sample_data)  # store sample data
    sra = SeqrunAdaptor(**{'session': base.session})
    sra.store_seqrun_and_attribute_data(data=seqrun_data)  # load seqrun data
    ea = ExperimentAdaptor(**{'session': base.session})
    ea.store_project_and_attribute_data(
        data=experiment_data)  # load experiment data
    ra = RunAdaptor(**{'session': base.session})
    ra.store_run_and_attribute_data(data=run_data)  # load run data
    pipeline_data = [{
        "pipeline_name": "DemultiplexIlluminaFastq",
        "pipeline_db": "sqlite:////bcl2fastq.db",
    }]
コード例 #19
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        # load platform data
        platform_data=\
          [{"platform_igf_id" : "M03291" ,
            "model_name" : "MISEQ" ,
            "vendor_name" : "ILLUMINA" ,
            "software_name" : "RTA" ,
            "software_version" : "RTA1.18.54"
           },
           {"platform_igf_id" : "NB501820",
            "model_name" : "NEXTSEQ",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           },
           {"platform_igf_id" : "K00345",
            "model_name" : "HISEQ4000",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           }]

        flowcell_rule_data=\
          [{"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 SR",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"},
           {"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 PE",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"NB501820",
            "flowcell_type":"NEXTSEQ",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"M03291",
            "flowcell_type":"MISEQ",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"}]

        pl = PlatformAdaptor(**{'session_class': base.session_class})
        pl.start_session()
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        pl.close_session()

        # load project data

        project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}]
        pa = ProjectAdaptor(**{'session_class': base.session_class})
        pa.start_session()
        pa.store_project_and_attribute_data(data=project_data)
        pa.close_session()

        # load samples

        sample_data = [
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109792',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109793',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109794',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109795',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109796',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797_1',
                'expected_read': 40000000
            },
        ]

        sa = SampleAdaptor(**{'session_class': base.session_class})
        sa.start_session()
        sa.store_sample_and_attribute_data(data=sample_data)
        sa.close_session()

        # load seqrun data

        seqrun_data = [{
            'flowcell_id': 'HV2GJBBXX',
            'platform_igf_id': 'K00345',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX'
        }]

        sra = SeqrunAdaptor(**{'session_class': base.session_class})
        sra.start_session()
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        sra.close_session()

        # load experiment data

        experiment_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'library_name': 'IGF109792',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109792',
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'library_name': 'IGF109793',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109793',
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'library_name': 'IGF109794',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109794',
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'library_name': 'IGF109795',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109795',
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'library_name': 'IGF109796',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109796',
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'library_name': 'IGF109797',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109797',
           },
          ]

        ea = ExperimentAdaptor(**{'session_class': base.session_class})
        ea.start_session()
        ea.store_project_and_attribute_data(data=experiment_data)
        ea.close_session()

        # load run data

        run_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':288046541
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':14666330
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':5009143
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1391747
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': '	IGF109796_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1318008
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1216324
           },
          ]

        ra = RunAdaptor(**{'session_class': base.session_class})
        ra.start_session()
        ra.store_run_and_attribute_data(data=run_data)
        ra.close_session()
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     base.start_session()
     self.session_class = base.get_session_class()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'IGF00001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00001',
             'experiment_igf_id': 'IGF00001_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00003',
             'experiment_igf_id': 'IGF00003_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00002',
             'experiment_igf_id': 'IGF00002_HISEQ4000',
             'library_name': 'IGF00002'
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     base.close_session()
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.drop_all(self.engine)
        if os.path.exists(self.dbname):
            os.remove(self.dbname)
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        self.temp_work_dir = get_temp_dir()
        self.temp_base_dir = get_temp_dir()
        self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz']
        for file_name in self.input_list:
            file_path = os.path.join(self.temp_work_dir, file_name)
            with open(file_path, 'w') as fq:
                fq.write('AAAA')  # create input files

        base = BaseAdaptor(**{'session_class': self.session_class})
        base.start_session()
        platform_data = [{
            "platform_igf_id": "M001",
            "model_name": "MISEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA1.18.54"
        }]  # platform data
        flowcell_rule_data = [{
            "platform_igf_id": "M001",
            "flowcell_type": "MISEQ",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }]  # flowcell rule data
        pl = PlatformAdaptor(**{'session': base.session})
        pl.store_platform_data(data=platform_data)  # loading platform data
        pl.store_flowcell_barcode_rule(
            data=flowcell_rule_data)  # loading flowcell rules data
        project_data = [{'project_igf_id': 'ProjectA'}]  # project data
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(
            data=project_data)  # load project data
        sample_data = [{
            'sample_igf_id': 'SampleA',
            'project_igf_id': 'ProjectA'
        }]  # sample data
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(
            data=sample_data)  # store sample data
        seqrun_data = [{
            'seqrun_igf_id': 'SeqrunA',
            'flowcell_id': '000000000-D0YLK',
            'platform_igf_id': 'M001',
            'flowcell': 'MISEQ'
        }]  # seqrun data
        sra = SeqrunAdaptor(**{'session': base.session})
        sra.store_seqrun_and_attribute_data(
            data=seqrun_data)  # load seqrun data
        experiment_data = [{
            'experiment_igf_id': 'ExperimentA',
            'sample_igf_id': 'SampleA',
            'library_name': 'SampleA',
            'platform_name': 'MISEQ',
            'project_igf_id': 'ProjectA'
        }]  # experiment data
        ea = ExperimentAdaptor(**{'session': base.session})
        ea.store_project_and_attribute_data(
            data=experiment_data)  # load experiment data
        run_data = [{
            'run_igf_id': 'RunA',
            'experiment_igf_id': 'ExperimentA',
            'seqrun_igf_id': 'SeqrunA',
            'lane_number': '1'
        }]  # run data
        ra = RunAdaptor(**{'session': base.session})
        ra.store_run_and_attribute_data(data=run_data)  # load run data
        base.commit_session()
        base.close_session()
コード例 #22
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [
         {
             "platform_igf_id": "M03291",
             "model_name": "MISEQ",
             "vendor_name": "ILLUMINA",
             "software_name": "RTA",
             "software_version": "RTA1.18.54"
         },
     ]
     flowcell_rule_data = [{
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     project_data = [{'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [{
         'sample_igf_id': 'IGF103923',
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'species_name': 'HG38'
     }]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     seqrun_data = [
         {
             'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
             'flowcell_id': '000000000-BRN47',
             'platform_igf_id': 'M03291',
             'flowcell': 'MISEQ'
         },
     ]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     pipeline_data = [
         {
             "pipeline_name": "PrimaryAnalysis",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
         {
             "pipeline_name": "DemultiplexIlluminaFastq",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
     ]
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=pipeline_data)
     file_data = [
         {
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404'
         },
         {
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
         {
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
     ]
     collection_files_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz'
         },
     ]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     experiment_data = [{
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ',
         'library_name': 'IGF103923',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'TENX-TRANSCRIPTOME-3P',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }, {
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'library_name': 'IGF103923_1',
         'library_source': 'GENOMIC_SINGLE_CELL',
         'library_strategy': 'WGS',
         'experiment_type': 'UNKNOWN',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'IGF103923_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ1_000000000-BRN47_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     base.close_session()
コード例 #23
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        self.fastq_dir = 'data/collect_fastq_dir/sc_1_8'
        self.model_name = 'NEXTSEQ'
        self.flowcell_id = 'TESTABC'
        self.seqrun_igf_id = '171003_NB500000_0089_TESTABC'
        self.file_location = 'HPC_PROJECT'
        self.samplesheet_file = 'data/collect_fastq_dir/sc_1_8/SampleSheet.csv'
        self.samplesheet_filename = 'SampleSheet.csv'
        self.manifest_name = 'file_manifest.csv'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.session_class
        base.start_session()
        platform_data = [{
            "platform_igf_id": "M00001",
            "model_name": "MISEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA1.18.54"
        }, {
            "platform_igf_id": "NB500000",
            "model_name": "NEXTSEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA2"
        }, {
            "platform_igf_id": "K00000",
            "model_name": "HISEQ4000",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA2"
        }]
        flowcell_rule_data = [{
            "platform_igf_id": "K00000",
            "flowcell_type": "HiSeq 3000/4000 SR",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }, {
            "platform_igf_id": "K00000",
            "flowcell_type": "HiSeq 3000/4000 PE",
            "index_1": "NO_CHANGE",
            "index_2": "REVCOMP"
        }, {
            "platform_igf_id": "NB500000",
            "flowcell_type": "NEXTSEQ",
            "index_1": "NO_CHANGE",
            "index_2": "REVCOMP"
        }, {
            "platform_igf_id": "M00001",
            "flowcell_type": "MISEQ",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }]
        pl = PlatformAdaptor(**{'session': base.session})
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        project_data = [{
            'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            'project_name': 'test_22-8-2017_rna',
            'description': 'Its project 1',
            'project_deadline': 'Before August 2017',
            'comments': 'Some samples are treated with drug X',
        }]
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(data=project_data)
        sample_data = [
            {
                'sample_igf_id': 'IGF00001',
                'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            },
            {
                'sample_igf_id': 'IGF00002',
                'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            },
        ]
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(data=sample_data)

        seqrun_data = [{
            'seqrun_igf_id': '171003_NB500000_0089_TESTABC',
            'flowcell_id': 'TESTABC',
            'platform_igf_id': 'NB500000',
            'flowcell': 'NEXTSEQ',
        }]
        sra = SeqrunAdaptor(**{'session': base.session})
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        base.close_session()
コード例 #24
0
    def _check_and_register_data(self, data, project_info_file):
        '''
    An internal method for checking and registering data

    :param data: A dictionary containing following keys
    
          project_data
          user_data
          project_user_data
          sample_data
    :param project_info_file: A filepath for project info
    '''
        try:
            db_connected = False
            project_data = pd.DataFrame(data['project_data'])
            user_data = pd.DataFrame(data['user_data'])
            project_user_data = pd.DataFrame(data['project_user_data'])
            sample_data = pd.DataFrame(data['sample_data'])
            base = BaseAdaptor(**{'session_class': self.session_class})
            base.start_session()  # connect_to db
            db_connected = True
            project_data = project_data[project_data[
                self.project_lookup_column].isnull() == False]
            project_data = project_data.drop_duplicates()
            if project_data.index.size > 0:
                project_data=project_data.\
                             apply(lambda x: \
                                   self._check_existing_data(\
                                      data=x,\
                                      dbsession=base.session, \
                                      table_name='project',
                                      check_column='EXISTS'),\
                                   axis=1)                                              # get project map
                project_data = project_data[project_data['EXISTS'] ==
                                            False]  # filter existing projects
                project_data.drop('EXISTS', axis=1,
                                  inplace=True)  # remove extra column

            user_data = user_data[user_data[self.user_lookup_column].isnull()
                                  == False]
            user_data = user_data.drop_duplicates()
            if user_data.index.size > 0:
                user_data=user_data.apply(lambda x: \
                                        self._assign_username_and_password(x), \
                                        axis=1)                                         # check for use account and password
                user_data=user_data.\
                          apply(lambda x: \
                                self._check_existing_data(\
                                      data=x,\
                                      dbsession=base.session, \
                                      table_name='user',
                                      check_column='EXISTS'),\
                                axis=1)                                                 # get user map
                user_data = user_data[user_data['EXISTS'] ==
                                      False]  # filter existing users
                user_data.drop('EXISTS', axis=1,
                               inplace=True)  # remove extra column

            sample_data = sample_data[sample_data[
                self.sample_lookup_column].isnull() == False]
            sample_data = sample_data.drop_duplicates()
            if sample_data.index.size > 0:
                sample_data=sample_data.\
                             apply(lambda x: \
                                   self._check_existing_data(\
                                      data=x,\
                                      dbsession=base.session, \
                                      table_name='sample',
                                      check_column='EXISTS'),\
                                   axis=1)                                              # get sample map
                sample_data = sample_data[sample_data['EXISTS'] ==
                                          False]  # filter existing samples
                sample_data.drop('EXISTS', axis=1,
                                 inplace=True)  # remove extra column

            project_user_data = project_user_data.drop_duplicates()
            project_user_data_mask=(project_user_data[self.project_lookup_column].isnull()==False) & \
                                   (project_user_data[self.user_lookup_column].isnull()==False)
            project_user_data = project_user_data[
                project_user_data_mask]  # not allowing any empty values for project or user lookup
            if project_user_data.index.size > 0:
                project_user_data = self._add_default_user_to_project(
                    project_user_data
                )  # update project_user_data with default users
                project_user_data=project_user_data.\
                                  apply(lambda x: \
                                   self._check_existing_data(\
                                      data=x,\
                                      dbsession=base.session, \
                                      table_name='project_user',
                                      check_column='EXISTS'),\
                                   axis=1)                                              # get project user map
                project_user_data = project_user_data[project_user_data[
                    'EXISTS'] == False]  # filter existing project user
                project_user_data.drop('EXISTS', axis=1,
                                       inplace=True)  # remove extra column

            if len(project_data.index) > 0:  # store new projects
                pa1 = ProjectAdaptor(**{'session': base.session
                                        })  # connect to project adaptor
                pa1.store_project_and_attribute_data(
                    data=project_data, autosave=False)  # load project data

            if len(user_data.index) > 0:  # store new users
                ua = UserAdaptor(**{'session': base.session})
                ua.store_user_data(data=user_data,
                                   autosave=False)  # load user data

            if len(project_user_data.index) > 0:  # store new project users
                pa2 = ProjectAdaptor(**{'session': base.session
                                        })  # connect to project adaptor
                project_user_data = project_user_data.to_dict(
                    orient='records')  # convert dataframe to dictionary
                pa2.assign_user_to_project(
                    data=project_user_data,
                    autosave=False)  # load project user data

            if len(sample_data.index) > 0:  # store new samples
                sa = SampleAdaptor(**{'session': base.session
                                      })  # connect to sample adaptor
                sa.store_sample_and_attribute_data(
                    data=sample_data, autosave=False)  # load samples data

            if self.setup_irods:
                user_data.apply(lambda x: self._setup_irods_account(data=x),
                                axis=1)  # create irods account

            file_checksum = calculate_file_checksum(filepath=project_info_file)
            file_size = os.path.getsize(project_info_file)
            file_data=[{'file_path':project_info_file,\
                        'location':'ORWELL',\
                        'md5':file_checksum,\
                        'size':file_size,\
                      }]
            fa = FileAdaptor(**{'session':
                                base.session})  # connect to file adaptor
            fa.store_file_data(data=file_data, autosave=False)

        except:
            if db_connected:
                base.rollback_session()  # rollback session
            raise
        else:
            if db_connected:
                base.commit_session()  # commit changes to db
                if len(user_data.index) > 0 and self.notify_user:
                    user_data.apply(lambda x: self._notify_about_new_user_account(x),\
                                    axis=1)                                               # send mail to new user with their password and forget it
        finally:
            if db_connected:
                base.close_session()  # close db connection
コード例 #25
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'IGF00001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00001',
             'experiment_igf_id': 'IGF00001_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00003',
             'experiment_igf_id': 'IGF00003_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00002',
             'experiment_igf_id': 'IGF00002_HISEQ4000',
             'library_name': 'IGF00002'
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     pipeline_data = [{
         "pipeline_name": "alignment",
         "pipeline_db": "sqlite:////data/aln.db",
         "pipeline_init_conf": {
             "input_dir": "data/fastq_dir/",
             "output_dir": "data"
         },
         "pipeline_run_conf": {
             "output_dir": "data"
         }
     }]
     pl = PipelineAdaptor(**{'session': base.session})
     pl.store_pipeline_data(data=pipeline_data)
     pipeline_seed_data = [
         {
             'pipeline_name': 'alignment',
             'seed_id': '1',
             'seed_table': 'experiment'
         },
     ]
     pl.create_pipeline_seed(data=pipeline_seed_data)
     base.close_session()
コード例 #26
0
    def _check_existing_data(self,
                             data,
                             dbsession,
                             table_name,
                             check_column='EXISTS'):
        '''
    An internal function for checking and registering project info
    
    :param data: A pandas data series
    :param dbsession: A sqlalchemy database session object
    :param table_name: A database table name
    :param check_column: Column name for existing data
    '''
        try:
            if not isinstance(data, pd.Series):
                raise ValueError('Expecting a data series and got {0}'.format(
                    type(data)))
            if table_name == 'project':
                if self.project_lookup_column in data and \
                   not pd.isnull(data[self.project_lookup_column]):
                    project_igf_id = data[self.project_lookup_column]
                    pa = ProjectAdaptor(**{'session': dbsession
                                           })  # connect to project adaptor
                    project_exists = pa.check_project_records_igf_id(
                        project_igf_id)
                    if project_exists:  # store data only if project is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}'.\
                                     format(self.project_lookup_column))
            elif table_name == 'user':
                if self.user_lookup_column in data and \
                   not pd.isnull(data[self.user_lookup_column]):
                    user_email = data[self.user_lookup_column]
                    ua = UserAdaptor(**{'session':
                                        dbsession})  # connect to user adaptor
                    user_exists = ua.check_user_records_email_id(
                        email_id=user_email)
                    if user_exists:  # store data only if user is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}'.\
                                     format(self.user_lookup_column))
            elif table_name == 'sample':
                if self.sample_lookup_column in data and \
                   not pd.isnull(data[self.sample_lookup_column]):
                    project_igf_id = data[self.project_lookup_column]
                    sample_igf_id = data[self.sample_lookup_column]
                    sa = SampleAdaptor(**{'session': dbsession
                                          })  # connect to sample adaptor
                    sample_project_exists=sa.check_project_and_sample(project_igf_id=project_igf_id,\
                                                                      sample_igf_id=sample_igf_id) # check for existing sample_id and project-id combination
                    if sample_project_exists:  # store data only if sample is not existing
                        data[check_column] = True
                    else:
                        sample_exists = sa.check_sample_records_igf_id(
                            sample_igf_id)  # check for existing sample
                        if sample_exists:
                            raise ValueError('Sample {0} exists in database but not associated with project {1}'.\
                                             format(sample_igf_id,project_igf_id))            # inconsistency in sample project combination
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}'.\
                                     format(self.sample_lookup_column))
            elif table_name == 'project_user':
                if self.user_lookup_column in data and \
                    not pd.isnull(data[self.user_lookup_column]) and \
                   self.project_lookup_column in data and \
                    not pd.isnull(data[self.project_lookup_column]):
                    project_igf_id = data[self.project_lookup_column]
                    user_email = data[self.user_lookup_column]
                    pa = ProjectAdaptor(**{'session': dbsession
                                           })  # connect to project adaptor
                    project_user_exists=pa.check_existing_project_user(project_igf_id,\
                                                                       email_id=user_email)
                    if user_email != self.default_user_email and \
                       (self.data_authority_column not in data or \
                       pd.isnull(data[self.data_authority_column])):
                        data[
                            self.
                            data_authority_column] = True  # set user as data authority, filter default user

                    if project_user_exists:  # store data only if sample is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}, {1}'.\
                                     format(self.project_lookup_column,\
                                            self.user_lookup_column))
            else:
                raise ValueError('table {0} not supported'.format(table_name))
        except:
            raise
コード例 #27
0
    def setUp(self):
        self.path = 'data/seqrun_dir'
        self.dbconfig = 'data/dbconfig.json'
        self.md5_out_path = 'data/md5_dir'
        self.pipeline_name = 'demultiplexing_fastq'

        seqrun_json = 'data/seqrun_db_data.json'
        platform_json = 'data/platform_db_data.json'
        pipeline_json = 'data/pipeline_data.json'

        os.mkdir(self.md5_out_path)
        dbparam = None
        with open(self.dbconfig, 'r') as json_data:
            dbparam = json.load(json_data)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        self.pipeline_name = ''
        Base.metadata.create_all(self.engine)
        base.start_session()
        user_data = [
            {
                'name': 'user1',
                'email_id': '*****@*****.**',
                'username': '******'
            },
        ]
        ua = UserAdaptor(**{'session': base.session})
        ua.store_user_data(data=user_data)
        project_data = [{
            'project_igf_id': 'project_1',
            'project_name': 'test_22-8-2017_rna',
            'description': 'Its project 1',
            'project_deadline': 'Before August 2017',
            'comments': 'Some samples are treated with drug X',
        }]
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(data=project_data)
        project_user_data = [{
            'project_igf_id': 'project_1',
            'email_id': '*****@*****.**',
            'data_authority': True
        }]
        pa.assign_user_to_project(data=project_user_data)
        sample_data = [
            {
                'sample_igf_id': 'IGF0001',
                'project_igf_id': 'project_1',
            },
            {
                'sample_igf_id': 'IGF0002',
                'project_igf_id': 'project_1',
            },
            {
                'sample_igf_id': 'IGF0003',
                'project_igf_id': 'project_1',
            },
        ]
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(data=sample_data)
        base.commit_session()
        with open(pipeline_json,
                  'r') as json_data:  # store pipeline data to db
            pipeline_data = json.load(json_data)
            pa = PipelineAdaptor(**{'session': base.session})
            pa.store_pipeline_data(data=pipeline_data)

        with open(platform_json,
                  'r') as json_data:  # store platform data to db
            platform_data = json.load(json_data)
            pl = PlatformAdaptor(**{'session': base.session})
            pl.store_platform_data(data=platform_data)

        with open(seqrun_json, 'r') as json_data:  # store seqrun data to db
            seqrun_data = json.load(json_data)
            sra = SeqrunAdaptor(**{'session': base.session})
            sra.store_seqrun_and_attribute_data(data=seqrun_data)
            base.close_session()
コード例 #28
0
  def run(self):
    try:
      seqrun_igf_id = self.param_required('seqrun_igf_id')
      project_name = self.param_required('project_name')
      seqrun_date = self.param_required('seqrun_date')
      flowcell_id = self.param_required('flowcell_id')
      igf_session_class = self.param_required('igf_session_class')
      template_dir = self.param_required('template_dir')
      email_template_path = self.param('email_template_path')
      email_template = self.param('email_template')
      sendmail_exe = self.param('sendmail_exe')
      use_ephemeral_space = self.param('use_ephemeral_space')
      hpcUser = False                                                           # default value for hpc users

      pa = ProjectAdaptor(**{'session_class':igf_session_class})
      pa.start_session()
      user_info = pa.get_project_user_info(project_igf_id=project_name)         # fetch user info from db
      pa.close_session()

      user_info = user_info[user_info['data_authority']=='T']                   # filter dataframe for data authority
      user_info = user_info.to_dict(orient='records')                           # convert dataframe to list of dictionaries
      if len(user_info) == 0:
        raise ValueError('No user found for project {0}'.format(project_name))

      user_info = user_info[0]
      user_name = user_info['name']                                             # get username for irods
      login_name = user_info['username']
      user_email = user_info['email_id']
      user_category = user_info['category']
      if user_category=='HPC_USER':
        hpcUser = True                                                          # set value for hpc user
        message = 'loading hpc user specific settings for {0}:{1}'.\
                  format(user_name,login_name)
        self.post_message_to_slack(message,reaction='pass')                     # send message to slack

      email_template_path = \
        os.path.join(\
          template_dir,
          email_template_path)
      template_env = \
        Environment(\
          loader=FileSystemLoader(\
                   searchpath=email_template_path),
          autoescape=select_autoescape(['html','xml']))                         # set template env
      template_file = template_env.get_template(email_template)
      temp_work_dir = \
        get_temp_dir(use_ephemeral_space=use_ephemeral_space)                   # get a temp dir
      report_output_file = \
        os.path.join(\
          temp_work_dir,
          email_template)
      template_file.\
        stream(\
          projectName=project_name,
          customerEmail=user_email,
          customerName=user_name,
          customerUsername=login_name,
          projectRunDate=seqrun_date,
          flowcellId=flowcell_id,
          hpcUser=hpcUser).\
        dump(report_output_file)
      proc = \
        subprocess.\
          Popen(\
            ['cat',
             report_output_file
            ],
            stdout=subprocess.PIPE)
      sendmail_cmd = \
        [sendmail_exe,
         '-t',
        ]
      subprocess.\
        check_call(\
          sendmail_cmd,
          stdin=proc.stdout)
      proc.stdout.close()
      remove_dir(temp_work_dir)
      message = \
        'finished data processing for seqrun: {0}, project: {1}, sent mail to igf'.\
        format(seqrun_igf_id, project_name)
      self.post_message_to_slack(message,reaction='pass')
    except Exception as e:
      message = \
        'seqrun: {2}, Error in {0}: {1}'.\
         format(\
           self.__class__.__name__,
           e,
           seqrun_igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise
コード例 #29
0
    def load_file_to_disk_and_db(self,
                                 input_file_list,
                                 withdraw_exisitng_collection=True,
                                 autosave_db=True,
                                 file_suffix=None,
                                 force=True,
                                 remove_file=False):
        '''
    A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present.
    Directory structure of the final path is based on the collection_table information.
    
    Following will be the final directory structure if base_path is present
    
    project - base_path/project_igf_id/analysis_name
    sample - base_path/project_igf_id/sample_igf_id/analysis_name
    experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name
    run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name
    
    :param input_file_list: A list of input file to load, all using the same collection info
    :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files
    :param autosave_db: Save changes to database, default True
    :param file_suffix: Use a specific file suffix, use None if it should be same as original file
                        e.g. input.vcf.gz to  output.vcf.gz
    :param force: Toggle for removing existing file, default True
    :param remove_file: A toggle for removing existing file from disk, default False
    :returns: A list of final filepath
    '''
        try:
            project_igf_id = None
            sample_igf_id = None
            experiment_igf_id = None
            experiment_igf_id = None
            run_igf_id = None
            output_path_list = list()  # define empty output list
            dbconnected = False
            if self.collection_name is None or \
               self.collection_type is None or \
               self.collection_table is None:
                raise ValueError('File collection information is incomplete'
                                 )  # check for collection information

            base = BaseAdaptor(**{'session_class': self.dbsession_class})
            base.start_session()  # connect to db
            dbconnected = True
            if self.base_path is not None:
                if self.collection_table == 'sample':
                    sa = SampleAdaptor(**{'session': base.session})
                    sample_igf_id = self.collection_name
                    sample_exists = sa.check_sample_records_igf_id(
                        sample_igf_id=sample_igf_id)
                    if not sample_exists:
                        raise ValueError('Sample {0} not found in db'.\
                                         format(sample_igf_id))

                    project_igf_id = \
                      sa.fetch_sample_project(sample_igf_id=sample_igf_id)                # fetch project id for sample
                elif self.collection_table == 'experiment':
                    ea = ExperimentAdaptor(**{'session': base.session})
                    experiment_igf_id = self.collection_name
                    experiment_exists = \
                      ea.check_experiment_records_id(
                        experiment_igf_id=experiment_igf_id)
                    if not experiment_exists:
                        raise ValueError('Experiment {0} not present in database'.\
                                         format(experiment_igf_id))

                    (project_igf_id,sample_igf_id) = \
                        ea.fetch_project_and_sample_for_experiment(
                          experiment_igf_id=experiment_igf_id)                            # fetch project and sample id for experiment
                elif self.collection_table == 'run':
                    ra = RunAdaptor(**{'session': base.session})
                    run_igf_id = self.collection_name
                    run_exists = ra.check_run_records_igf_id(
                        run_igf_id=run_igf_id)
                    if not run_exists:
                        raise ValueError('Run {0} not found in database'.\
                                         format(run_igf_id))

                    (project_igf_id,sample_igf_id,experiment_igf_id) = \
                      ra.fetch_project_sample_and_experiment_for_run(
                        run_igf_id=run_igf_id)                                            # fetch project, sample and experiment id for run
                elif self.collection_table == 'project':
                    pa = ProjectAdaptor(**{'session': base.session})
                    project_igf_id = self.collection_name
                    project_exists = \
                      pa.check_project_records_igf_id(
                        project_igf_id=project_igf_id)
                    if not project_exists:
                        raise ValueError('Project {0} not found in database'.\
                                         format(project_igf_id))

            if self.rename_file and self.analysis_name is None:
                raise ValueError('Analysis name is required for renaming file'
                                 )  # check analysis name

            for input_file in input_file_list:
                final_path = ''
                if self.base_path is None:  # do not move file if base_path is absent
                    final_path = os.path.dirname(input_file)
                else:  # move file path
                    if self.collection_table == 'project':
                        if project_igf_id is None:
                            raise ValueError('Missing project id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            self.analysis_name)                                             # final path for project
                    elif self.collection_table == 'sample':
                        if project_igf_id is None or \
                           sample_igf_id is None:
                            raise ValueError('Missing project and sample id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            self.analysis_name)                                             # final path for sample
                    elif self.collection_table == 'experiment':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None:
                            raise ValueError('Missing project,sample and experiment id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            self.analysis_name)                                             # final path for experiment
                    elif self.collection_table == 'run':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None or \
                           run_igf_id is None:
                            raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(\
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            run_igf_id,
                            self.analysis_name)                                             # final path for run

                if self.rename_file:
                    new_filename = \
                      self.get_new_file_name(
                        input_file=input_file,
                        file_suffix=file_suffix)
                    final_path = \
                      os.path.join(
                        final_path,
                        new_filename)                                                     # get new filepath
                else:
                    final_path = \
                      os.path.join(
                        final_path,
                        os.path.basename(input_file))

                if final_path != input_file:  # move file if its required
                    final_path = preprocess_path_name(
                        input_path=final_path
                    )  # remove unexpected characters from file path
                    move_file(source_path=input_file,
                              destinationa_path=final_path,
                              force=force
                              )  # move or overwrite file to destination dir

                output_path_list.append(
                    final_path)  # add final path to the output list
                self.create_or_update_analysis_collection(
                    file_path=final_path,
                    dbsession=base.session,
                    withdraw_exisitng_collection=withdraw_exisitng_collection,
                    remove_file=remove_file,
                    autosave_db=autosave_db)  # load new file collection in db
                if autosave_db:
                    base.commit_session()  # save changes to db for each file

            base.commit_session()  # save changes to db
            base.close_session()  # close db connection
            return output_path_list
        except:
            if dbconnected:
                base.rollback_session()
                base.close_session()
            raise
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [{
         "platform_igf_id": "M03291",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }, {
         "platform_igf_id": "NB501820",
         "model_name": "NEXTSEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }, {
         "platform_igf_id": "K00345",
         "model_name": "HISEQ4000",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 SR",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }, {
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 PE",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "NB501820",
         "flowcell_type": "NEXTSEQ",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     seqrun_data = [{
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'flowcell_id': '000000000-BRN47',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }, {
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'flowcell_id': '000000001-BRN47',
         'platform_igf_id': 'NB501820',
         'flowcell': 'NEXTSEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     project_data = [{'project_igf_id': 'projectA'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'sampleA',
             'project_igf_id': 'projectA',
             'species_name': 'HG38'
         },
         {
             'sample_igf_id': 'sampleB',
             'project_igf_id': 'projectA',
             'species_name': 'UNKNOWN'
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_MISEQ',
             'library_name': 'sampleA',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_NEXTSEQ',
             'library_name': 'sampleA',
             'library_source': 'UNKNOWN',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'NEXTSEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleB',
             'experiment_igf_id': 'sampleB_MISEQ',
             'library_name': 'sampleB',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'sampleA_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'sampleA_NEXTSEQ',
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'lane_number': '2'
     }, {
         'experiment_igf_id': 'sampleB_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     file_data = [
         {
             'file_path':
             '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path':
             '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [{
         'name': 'sampleA_MISEQ_000000000-BRN47_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleB_MISEQ_HVWN7BBXX_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }]
     collection_files_data = [{
         'name':
         'sampleA_MISEQ_000000000-BRN47_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz'
     }, {
         'name':
         'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz'
     }, {
         'name':
         'sampleB_MISEQ_HVWN7BBXX_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz'
     }]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     base.close_session()