def test_fetch_all_project_igf_ids(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     project_list = pa.fetch_all_project_igf_ids()
     pa.close_session()
     self.assertTrue('IGFP0002_test_22-8-2017_rna' in
                     project_list['project_igf_id'].values)
     self.assertTrue('IGFP0001_test_22-8-2017_rna' in
                     project_list['project_igf_id'].values)
     self.assertEqual(len(project_list['project_igf_id'].values), 2)
 def test_check_data_authority_for_project(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     pa_results1 = pa.check_data_authority_for_project(
         project_igf_id='IGFP0001_test_22-8-2017_rna')
     self.assertTrue(pa_results1)
     pa_results2 = pa.check_data_authority_for_project(
         project_igf_id='IGFP0002_test_22-8-2017_rna')
     self.assertFalse(pa_results2)
     pa.close_session()
 def test_fetch_data_authority_for_project(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     pa_results1 = pa.fetch_data_authority_for_project(
         project_igf_id='IGFP0001_test_22-8-2017_rna')
     self.assertEqual(pa_results1.email_id, '*****@*****.**')
     pa_results2 = pa.fetch_data_authority_for_project(
         project_igf_id='IGFP0002_test_22-8-2017_rna')
     self.assertEqual(pa_results2, None)
     pa.close_session()
  def run(self):
    '''
    A ehive runnable method for uploading analysis files to irods server
    
    :param file_list: A list of file paths to upload to irods
    :param irods_exe_dir: Irods executable directory
    :param project_igf_id: Name of the project
    :param analysis_name: A string for analysis name, default is 'default'
    :param dir_path_list: A list of directory structure for irod server, default None for using datestamp
    :param file_tag: A text string for adding tag to collection, default None for only project_name
    '''
    try:
      project_igf_id = self.param_required('project_igf_id')
      igf_session_class = self.param_required('igf_session_class')
      irods_exe_dir = self.param_required('irods_exe_dir')
      file_list = self.param_required('file_list')
      analysis_name = self.param_required('analysis_name')
      dir_path_list = self.param_required('dir_path_list')
      file_tag = self.param_required('file_tag')

      pa = ProjectAdaptor(**{'session_class':igf_session_class})
      pa.start_session()
      user = \
        pa.fetch_data_authority_for_project(
          project_igf_id=project_igf_id)                                        # fetch user info from db
      pa.close_session()

      if user is None:
        raise ValueError('No user found for project {0}'.format(project_igf_id))

      username = user.username                                                  # get username for irods
      irods_upload = IGF_irods_uploader(irods_exe_dir)                          # create instance for irods upload
      for file_path in file_list:
        if not os.path.exists(file_path):
          raise IOError('Failed to find file {0} for irods upload'.\
                        format(file_path))

      irods_upload.\
        upload_analysis_results_and_create_collection(
          file_list=file_list,
          irods_user=username,
          project_name=project_igf_id,
          analysis_name=analysis_name,
          dir_path_list=dir_path_list,
          file_tag=file_tag)                                                    # upload analysis results to irods and build collection
    except Exception as e:
      message = \
        'project: {2}, Error in {0}: {1}'.format(
          self.__class__.__name__,
          e,
          project_igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise
 def test_count_project_samples(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     sample1 = pa.count_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna')
     self.assertEqual(sample1, 3)
     sample2 = pa.count_project_samples(
         project_igf_id='IGFP0002_test_22-8-2017_rna')
     self.assertEqual(sample2, 0)
     sample3 = pa.count_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna', only_active=False)
     self.assertEqual(sample3, 4)
     pa.close_session()
Ejemplo n.º 6
0
def mark_project_barcode_check_off(project_igf_id,
                                   session_class,
                                   barcode_check_attribute='barcode_check',
                                   barcode_check_val='OFF'):
    '''
  A utility method for marking project barcode check as off using the project_igf_id
  
  :param project_igf_id: A project_igf_id string
  :param session_class: A db session class object
  :param barcode_check_attribute: A text keyword for barcode check attribute, default barcode_check
  :param barcode_check_val: A text for barcode check attribute value, default is 'OFF'
  '''
    try:
        db_connected = False
        pr = ProjectAdaptor(**{'session_class': session_class})
        pr.start_session()
        db_connected = True
        pr_attributes = pr.check_project_attributes(
            project_igf_id=project_igf_id,
            attribute_name=barcode_check_attribute
        )  # check for the existing project attribute
        if pr_attributes:  # if attribute present, then modify it
            project = pr.fetch_project_records_igf_id(
                project_igf_id=project_igf_id)  # fetch project info
            query=pr.session.\
                  query(Project_attribute).\
                  filter(Project_attribute.attribute_name==barcode_check_attribute).\
                  filter(Project_attribute.project_id==project.project_id).\
                  update({Project_attribute.attribute_value:barcode_check_val,},
                         synchronize_session=False)                                   # create query for fetching attribute records and modify attribute records
        else:  # if project attribute is not present, store it
            data = [{
                'project_igf_id': project_igf_id,
                'attribute_name': barcode_check_attribute,
                'attribute_value': barcode_check_val
            }]  # create data structure for the attribute table
            pr.store_project_attributes(
                data, autosave=False
            )  # store data to attribute table without auto commit

        pr.commit_session()
    except:
        if db_connected:
            pr.rollback_session()
        raise
    finally:
        if db_connected:
            pr.close_session()
Ejemplo n.º 7
0
def get_project_read_count(project_igf_id,
                           session_class,
                           run_attribute_name='R1_READ_COUNT',
                           active_status='ACTIVE'):
    '''
  A utility method for fetching sample read counts for an input project_igf_id
  
  :param project_igf_id: A project_igf_id string
  :param session_class: A db session class object
  :param run_attribute_name: Attribute name from Run_attribute table for read count lookup
  :param active_status: text label for active runs, default ACTIVE
  :returns: A pandas dataframe containing following columns
  
               project_igf_id
               sample_igf_id
               flowcell_id
               attribute_value
  '''
    try:
        read_count = pd.DataFrame()
        pr = ProjectAdaptor(**{'session_class': session_class})
        pr.start_session()
        query=pr.session.query(Project.project_igf_id,
                               Sample.sample_igf_id,
                               Seqrun.flowcell_id,
                               Run_attribute.attribute_value).\
                         join(Sample,Project.project_id==Sample.project_id).\
                         join(Experiment,Sample.sample_id==Experiment.sample_id).\
                         join(Run,Experiment.experiment_id==Run.experiment_id).\
                         join(Seqrun,Seqrun.seqrun_id==Run.seqrun_id).\
                         join(Run_attribute,Run.run_id==Run_attribute.run_id).\
                         filter(Project.project_igf_id==project_igf_id).\
                         filter(Sample.project_id==Project.project_id).\
                         filter(Experiment.sample_id==Sample.sample_id).\
                         filter(Run.experiment_id==Experiment.experiment_id).\
                         filter(Seqrun.seqrun_id==Run.seqrun_id).\
                         filter(Run_attribute.run_id==Run.run_id).\
                         filter(Run_attribute.attribute_name==run_attribute_name).\
                         filter(Run.status==active_status).\
                         filter(Experiment.status==active_status).\
                         filter(Sample.status==active_status)
        results = pr.fetch_records(query=query)
        pr.close_session()
        if len(results.index) > 0:
            read_count = results
        return read_count
    except:
        raise
 def test_fetch_project_samples(self):
     pa = ProjectAdaptor(**{'session_class': self.session_class})
     pa.start_session()
     sample1 = pa.fetch_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna',
         output_mode='dataframe')
     self.assertEqual(len(sample1.index), 3)
     sample2 = pa.fetch_project_samples(
         project_igf_id='IGFP0002_test_22-8-2017_rna',
         output_mode='dataframe')
     self.assertEqual(len(sample2.index), 0)
     sample3 = pa.fetch_project_samples(
         project_igf_id='IGFP0001_test_22-8-2017_rna',
         only_active=False,
         output_mode='dataframe')
     self.assertEqual(len(sample3.index), 4)
     pa.close_session()
Ejemplo n.º 9
0
    def test_mark_project_barcode_check_off(self):
        pr = ProjectAdaptor(**{'session_class': self.session_class})
        pr.start_session()
        pr.store_project_and_attribute_data(self.data)
        pr.close_session()

        mark_project_barcode_check_off(
            project_igf_id='IGFP001_test1_24-1-18',
            session_class=self.session_class)  # no attribute record
        pr.start_session()
        attribute_check = pr.check_project_attributes(
            project_igf_id='IGFP001_test1_24-1-18',
            attribute_name='barcode_check')
        self.assertTrue(attribute_check)
        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP001_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')

        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP002_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'ON')
        pr.close_session()

        mark_project_barcode_check_off(
            project_igf_id='IGFP002_test1_24-1-18',
            session_class=self.session_class)  # barcode check ON
        pr.start_session()
        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP002_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')

        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP003_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')
        pr.close_session()

        mark_project_barcode_check_off(
            project_igf_id='IGFP003_test1_24-1-18',
            session_class=self.session_class)  # barcode check OFF
        pr.start_session()
        pr_attributes = pr.get_project_attributes(
            project_igf_id='IGFP003_test1_24-1-18',
            attribute_name='barcode_check')
        for pr_attribute in pr_attributes.to_dict(orient='records'):
            self.assertEqual(pr_attribute['attribute_value'], 'OFF')
        pr.close_session()
Ejemplo n.º 10
0
def get_seqrun_info_for_project(project_igf_id, session_class):
    '''
  A utility method for fetching seqrun_igf_id and flowcell_id which are linked
  to a specific project_igf_id
  
  required params:
  project_igf_id: A project_igf_id string
  session_class: A db session class object
  
  returns a pandas dataframe containing following columns
    seqrun_igf_id
    flowcell_id
  '''
    try:
        seqrun_info = pd.DataFrame()
        pr = ProjectAdaptor(**{'session_class': session_class})
        pr.start_session()
        query=pr.session.query(distinct(Seqrun.seqrun_igf_id).\
                               label('seqrun_igf_id'),
                               Seqrun.flowcell_id).\
                         join(Run,Seqrun.seqrun_id==Run.seqrun_id).\
                         join(Experiment,Experiment.experiment_id==Run.experiment_id).\
                         join(Sample,Sample.sample_id==Experiment.sample_id).\
                         join(Project,Project.project_id==Sample.project_id).\
                         filter(Project.project_id==Sample.project_id).\
                         filter(Sample.sample_id==Experiment.sample_id).\
                         filter(Experiment.experiment_id==Run.experiment_id).\
                         filter(Run.seqrun_id==Seqrun.seqrun_id).\
                         filter(Project.project_igf_id==project_igf_id)
        results = pr.fetch_records(query=query)
        pr.close_session()
        if len(results.index) > 0:
            seqrun_info = results
        return seqrun_info
    except:
        raise
  def run(self):
    try:
      fastq_dir = self.param_required('fastq_dir')
      seqrun_igf_id = self.param_required('seqrun_igf_id')
      project_name = self.param_required('project_name')
      igf_session_class = self.param_required('igf_session_class')
      irods_exe_dir = self.param_required('irods_exe_dir')
      flowcell_id = self.param_required('flowcell_id')
      samplesheet_filename = self.param('samplesheet_filename')
      manifest_name = self.param_required('manifest_name')
      report_html = self.param('report_html')
      use_ephemeral_space = self.param('use_ephemeral_space')

      pa = ProjectAdaptor(**{'session_class':igf_session_class})
      pa.start_session()
      user = \
        pa.fetch_data_authority_for_project(\
          project_igf_id=project_name)                                          # fetch user info from db
      pa.close_session()

      if user is None:
        raise ValueError('No user found for project {0}'.\
                         format(project_name))

      username = user.username                                                  # get username for irods

      report_htmlname = os.path.basename(report_html)
      seqrun_date = seqrun_igf_id.split('_')[0]                                 # collect seqrun date from igf id
      seqrun_date = datetime.datetime.strptime(seqrun_date,'%y%m%d').date()     # identify actual date
      seqrun_date = str(seqrun_date)                                            # convert object to string
      irods_upload = IGF_irods_uploader(irods_exe_dir)                          # create instance for irods upload
      base_seq_dir = os.path.basename(fastq_dir)                                # get base name for the source dir
      tarfile_name = \
        '{0}_{1}_{2}.tar'.\
          format(\
            project_name,
            base_seq_dir,
            seqrun_date)                                                        # construct name of the tarfile
      temp_work_dir = \
        get_temp_dir(use_ephemeral_space=use_ephemeral_space)                   # get a temp dir
      tarfile_name = \
        os.path.join(
          temp_work_dir,
          tarfile_name)                                                         # create tarfile in the temp dir

      with tarfile.open(tarfile_name, "w") as tar:
        for root,_, files in os.walk(top=fastq_dir):
          if samplesheet_filename in files:
            samplesheet_file = \
              os.path.join(os.path.abspath(root),
                           samplesheet_filename)                                # get samplesheet filepath
            tmp_samplesheet_file = \
              os.path.join(
                temp_work_dir,
                '{0}_{1}_{2}_{3}'.\
                  format(
                    project_name,
                    base_seq_dir,
                    seqrun_date,
                    samplesheet_filename))
            copy2(
              samplesheet_file,
              tmp_samplesheet_file)                                             # change samplesheet filename
            tar.add(
              tmp_samplesheet_file,
              arcname=\
                os.path.relpath(
                  tmp_samplesheet_file,
                  start=temp_work_dir))                                         # add samplesheet file to tar

          if report_htmlname in files:
            for file in files:
              if fnmatch.fnmatch(os.path.join(root,file),report_html):
                report_file = os.path.join(os.path.abspath(root),file)          # get filepath for the report
                tmp_report_file = \
                  os.path.join(\
                    temp_work_dir,
                    '{0}_{1}_{2}_{3}'.\
                    format(\
                      project_name,
                      base_seq_dir,
                      seqrun_date,
                      os.path.basename(report_file)))                           # change report name
                copy2(report_file, tmp_report_file)                             # copy report file to temp
                tar.add(tmp_report_file,
                        arcname=os.path.relpath(tmp_report_file,
                                                start=temp_work_dir))           # add demultiplexing report to tar

          if manifest_name in files:
            manifest_file = \
              os.path.join(os.path.abspath(root),
                           manifest_name)                                       # get samplesheet filepath
            tmp_manifest_file = \
              os.path.join(\
                temp_work_dir,
                '{0}_{1}_{2}_{3}'.\
                format(\
                  project_name,
                  base_seq_dir,
                  seqrun_date,
                  manifest_name))                                               # change manifest name
            copy2(manifest_file,tmp_manifest_file)                              # copy manifest to temp
            tar.add(tmp_manifest_file,
                    arcname=os.path.relpath(tmp_manifest_file,
                                            start=temp_work_dir))               # add samplesheet file to tar

          for file in files:
            if fnmatch.fnmatch(file, '*.fastq.gz') and \
              not fnmatch.fnmatch(file, 'Undetermined_*'):
              fastq_file_path = os.path.join(os.path.abspath(root),file)        # get filepath for the fastq files
              tar.add(fastq_file_path,
                      arcname=os.path.relpath(fastq_file_path,
                                              start=fastq_dir))                 # add fastq file to tar

      irods_upload.\
      upload_fastqfile_and_create_collection(\
        filepath=tarfile_name,
        irods_user=username,
        project_name=project_name,
        run_igf_id=seqrun_igf_id,
        flowcell_id=flowcell_id,
        run_date=seqrun_date)                                                   # upload fastq data to irods
      remove_dir(temp_work_dir)                                                 # remove temp dir once data uoload is done
    except Exception as e:
      message = \
        'seqrun: {2}, Error in {0}: {1}'.\
        format(\
          self.__class__.__name__,
          e,
          seqrun_igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise
Ejemplo n.º 12
0
  def run(self):
    try:
      seqrun_igf_id = self.param_required('seqrun_igf_id')
      project_name = self.param_required('project_name')
      seqrun_date = self.param_required('seqrun_date')
      flowcell_id = self.param_required('flowcell_id')
      igf_session_class = self.param_required('igf_session_class')
      template_dir = self.param_required('template_dir')
      email_template_path = self.param('email_template_path')
      email_template = self.param('email_template')
      sendmail_exe = self.param('sendmail_exe')
      use_ephemeral_space = self.param('use_ephemeral_space')
      hpcUser = False                                                           # default value for hpc users

      pa = ProjectAdaptor(**{'session_class':igf_session_class})
      pa.start_session()
      user_info = pa.get_project_user_info(project_igf_id=project_name)         # fetch user info from db
      pa.close_session()

      user_info = user_info[user_info['data_authority']=='T']                   # filter dataframe for data authority
      user_info = user_info.to_dict(orient='records')                           # convert dataframe to list of dictionaries
      if len(user_info) == 0:
        raise ValueError('No user found for project {0}'.format(project_name))

      user_info = user_info[0]
      user_name = user_info['name']                                             # get username for irods
      login_name = user_info['username']
      user_email = user_info['email_id']
      user_category = user_info['category']
      if user_category=='HPC_USER':
        hpcUser = True                                                          # set value for hpc user
        message = 'loading hpc user specific settings for {0}:{1}'.\
                  format(user_name,login_name)
        self.post_message_to_slack(message,reaction='pass')                     # send message to slack

      email_template_path = \
        os.path.join(\
          template_dir,
          email_template_path)
      template_env = \
        Environment(\
          loader=FileSystemLoader(\
                   searchpath=email_template_path),
          autoescape=select_autoescape(['html','xml']))                         # set template env
      template_file = template_env.get_template(email_template)
      temp_work_dir = \
        get_temp_dir(use_ephemeral_space=use_ephemeral_space)                   # get a temp dir
      report_output_file = \
        os.path.join(\
          temp_work_dir,
          email_template)
      template_file.\
        stream(\
          projectName=project_name,
          customerEmail=user_email,
          customerName=user_name,
          customerUsername=login_name,
          projectRunDate=seqrun_date,
          flowcellId=flowcell_id,
          hpcUser=hpcUser).\
        dump(report_output_file)
      proc = \
        subprocess.\
          Popen(\
            ['cat',
             report_output_file
            ],
            stdout=subprocess.PIPE)
      sendmail_cmd = \
        [sendmail_exe,
         '-t',
        ]
      subprocess.\
        check_call(\
          sendmail_cmd,
          stdin=proc.stdout)
      proc.stdout.close()
      remove_dir(temp_work_dir)
      message = \
        'finished data processing for seqrun: {0}, project: {1}, sent mail to igf'.\
        format(seqrun_igf_id, project_name)
      self.post_message_to_slack(message,reaction='pass')
    except Exception as e:
      message = \
        'seqrun: {2}, Error in {0}: {1}'.\
         format(\
           self.__class__.__name__,
           e,
           seqrun_igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise
Ejemplo n.º 13
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        # load platform data
        platform_data=\
          [{"platform_igf_id" : "M03291" ,
            "model_name" : "MISEQ" ,
            "vendor_name" : "ILLUMINA" ,
            "software_name" : "RTA" ,
            "software_version" : "RTA1.18.54"
           },
           {"platform_igf_id" : "NB501820",
            "model_name" : "NEXTSEQ",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           },
           {"platform_igf_id" : "K00345",
            "model_name" : "HISEQ4000",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           }]

        flowcell_rule_data=\
          [{"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 SR",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"},
           {"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 PE",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"NB501820",
            "flowcell_type":"NEXTSEQ",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"M03291",
            "flowcell_type":"MISEQ",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"}]

        pl = PlatformAdaptor(**{'session_class': base.session_class})
        pl.start_session()
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        pl.close_session()

        # load project data

        project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}]
        pa = ProjectAdaptor(**{'session_class': base.session_class})
        pa.start_session()
        pa.store_project_and_attribute_data(data=project_data)
        pa.close_session()

        # load samples

        sample_data = [
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109792',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109793',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109794',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109795',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109796',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797_1',
                'expected_read': 40000000
            },
        ]

        sa = SampleAdaptor(**{'session_class': base.session_class})
        sa.start_session()
        sa.store_sample_and_attribute_data(data=sample_data)
        sa.close_session()

        # load seqrun data

        seqrun_data = [{
            'flowcell_id': 'HV2GJBBXX',
            'platform_igf_id': 'K00345',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX'
        }]

        sra = SeqrunAdaptor(**{'session_class': base.session_class})
        sra.start_session()
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        sra.close_session()

        # load experiment data

        experiment_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'library_name': 'IGF109792',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109792',
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'library_name': 'IGF109793',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109793',
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'library_name': 'IGF109794',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109794',
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'library_name': 'IGF109795',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109795',
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'library_name': 'IGF109796',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109796',
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'library_name': 'IGF109797',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109797',
           },
          ]

        ea = ExperimentAdaptor(**{'session_class': base.session_class})
        ea.start_session()
        ea.store_project_and_attribute_data(data=experiment_data)
        ea.close_session()

        # load run data

        run_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':288046541
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':14666330
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':5009143
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1391747
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': '	IGF109796_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1318008
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1216324
           },
          ]

        ra = RunAdaptor(**{'session_class': base.session_class})
        ra.start_session()
        ra.store_run_and_attribute_data(data=run_data)
        ra.close_session()
    def run(self):
        try:
            seqrun_igf_id = self.param_required('seqrun_igf_id')
            project_name = self.param_required('project_name')
            remote_project_path = self.param_required('remote_project_path')
            remote_user = self.param_required('remote_user')
            remote_host = self.param_required('remote_host')
            template_dir = self.param_required('template_dir')
            igf_session_class = self.param_required('igf_session_class')
            htaccess_template_path = self.param('htaccess_template_path')
            htaccess_template = self.param('htaccess_template')
            htpasswd_template = self.param('htpasswd_template')
            htaccess_filename = self.param('htaccess_filename')
            htpasswd_filename = self.param('htpasswd_filename')
            project_template = self.param('project_template')
            status_template = self.param('status_template')
            analysis_template = self.param('analysis_template')
            analysis_viewer_template = self.param('analysis_viewer_template')
            seqruninfofile = self.param('seqruninfofile')
            samplereadcountfile = self.param('samplereadcountfile')
            samplereadcountcsvfile = self.param('samplereadcountcsvfile')
            status_data_json = self.param('status_data_json')
            analysis_data_json = self.param('analysis_data_json')
            analysis_data_csv = self.param('analysis_data_csv')
            analysis_chart_data_csv = self.param('analysis_chart_data_csv')
            analysis_chart_data_json = self.param('analysis_chart_data_json')
            analysis_view_js = self.param('analysis_view_js')
            image_height = self.param('image_height')
            sample_count_threshold = self.param('sample_count_threshold')
            use_ephemeral_space = self.param('use_ephemeral_space')

            htaccess_template_path = \
              os.path.join(\
                template_dir,
                htaccess_template_path)                                               # set path for template dir
            project_template_path = \
              os.path.join(\
                template_dir,
                project_template)                                                     # set path for project template
            status_template_path = \
              os.path.join(\
                template_dir,
                status_template)                                                      # set path for project status template
            analysis_template_path = \
              os.path.join(\
                template_dir,
                analysis_template)                                                    # set path for project analysis template
            analysis_viewer_template = \
              os.path.join(\
                template_dir,
                analysis_viewer_template)                                             # set path for analysis viewer template
            pa = ProjectAdaptor(**{'session_class': igf_session_class})
            pa.start_session()
            user_info = \
              pa.get_project_user_info(project_igf_id=project_name)                   # fetch user info from db
            sample_counts = \
              pa.count_project_samples(\
                project_igf_id=project_name,
                only_active=True)                                                     # get sample counts for the project
            pa.close_session()

            image_height = \
              self._calculate_image_height(\
                sample_count=sample_counts,
                height=image_height,
                threshold=sample_count_threshold)                                     # change image height based on sample count

            user_info = user_info.to_dict(
                orient='records')  # convert dataframe to list of dictionaries
            if len(user_info) == 0:
                raise ValueError('No user found for project {0}'.\
                                 format(project_name))

            user_list = list()
            user_passwd_dict = dict()
            hpc_user = True  # by default, load hpc user settings
            for user in user_info:
                username = user['username']  # get username for irods
                user_list.append(username)
                if 'ht_password' in user.keys():
                    ht_passwd = user['ht_password']  # get htaccess passwd
                    user_passwd_dict.update({username: ht_passwd})

                if 'category' in user.keys() and \
                   'data_authority' in user.keys() and \
                   user['category'] == 'NON_HPC_USER' and \
                   user['data_authority']=='T':
                    hpc_user = False  # switch to non-hpc settings if primary user is non-hpc
            temp_work_dir = \
              get_temp_dir(use_ephemeral_space=use_ephemeral_space)                   # get a temp dir
            template_env = \
              Environment(\
                loader=FileSystemLoader(\
                         searchpath=htaccess_template_path),
                autoescape=select_autoescape(['html', 'xml']))                        # set template env
            htaccess = template_env.get_template(
                htaccess_template)  # read htaccess template
            htpasswd = template_env.get_template(
                htpasswd_template)  # read htpass template
            htaccess_output = \
              os.path.join(\
                temp_work_dir,
                htaccess_filename)
            htpasswd_output = \
              os.path.join(\
                temp_work_dir,
                htpasswd_filename)

            htaccess.\
            stream(\
              remote_project_dir=remote_project_path,
              project_tag=project_name,
              hpcUser=hpc_user,
              htpasswd_filename=htpasswd_filename,
              customerUsernameList=' '.join(user_list)).\
            dump(htaccess_output)                                                     # write new htacces file

            htpasswd.\
            stream(userDict=user_passwd_dict).\
            dump(htpasswd_output)                                                     # write new htpass file
            template_prj = \
              Environment(\
                loader=FileSystemLoader(\
                         searchpath=os.path.dirname(project_template_path)),
                autoescape=select_autoescape(['txt', 'xml']))                         # set template env for project
            project_index = \
              template_prj.\
                get_template(os.path.basename(project_template_path))                 # read htaccess template
            project_output = \
                os.path.join(\
                  temp_work_dir,
                  os.path.basename(project_template_path))
            project_index.\
            stream(\
              ProjectName=project_name,
              seqrunInfoFile=seqruninfofile,
              sampleReadCountFile=samplereadcountfile,
              sampleReadCountCsvFile=samplereadcountcsvfile,
              ImageHeight=image_height).\
            dump(project_output)                                                      # write new project file

            template_status = \
              Environment(\
                loader=FileSystemLoader(\
                         searchpath=os.path.dirname(status_template_path)),
                autoescape=select_autoescape(['txt', 'xml']))                         # set template env for project
            project_status = \
              template_status.\
              get_template(os.path.basename(status_template_path))                    # read status page template
            status_output = \
              os.path.join(\
                temp_work_dir,
                os.path.basename(status_template_path))
            project_status.\
            stream(\
              ProjectName=project_name,
              status_data_json=status_data_json).\
            dump(status_output)                                                       # write new project status file

            template_analysis = \
              Environment(\
                loader=FileSystemLoader(\
                         searchpath=os.path.dirname(analysis_template_path)),
                autoescape=select_autoescape(['txt', 'xml']))                         # set template env for analysis
            project_analysis = \
              template_analysis.\
                get_template(os.path.basename(analysis_template_path))                # read analysis page template
            analysis_output = \
              os.path.join(\
                temp_work_dir,
                os.path.basename(analysis_template_path))
            project_analysis.\
            stream(\
              ProjectName=project_name,
              analysisInfoFile=analysis_data_json,
              analysisInfoCsvFile=analysis_data_csv,
              analysisCsvDataFile=analysis_chart_data_csv,
              analysisPlotFile=analysis_chart_data_json).\
            dump(analysis_output)                                                     # write new project analysis file

            template_analysis_viewer = \
              Environment(\
                loader=FileSystemLoader(\
                         searchpath=os.path.dirname(analysis_viewer_template)),
                autoescape=select_autoescape(['txt', 'xml']))                         # set template env for analysis viewer
            project_analysis_viewer = \
              template_analysis_viewer.\
                get_template(os.path.basename(analysis_viewer_template))              # read analysis viewer page template
            analysis_viewer_output = \
              os.path.join(\
                temp_work_dir,
                os.path.basename(analysis_viewer_template))
            project_analysis_viewer.\
            stream(\
              ProjectName=project_name,
              analysisJsFile=analysis_view_js).\
            dump(analysis_viewer_output)                                              # write new project analysis viewer file

            remote_project_dir = \
              os.path.join(\
                remote_project_path,
                project_name)                                                         # ger remote project dir path
            remote_htaccess_file = \
              os.path.join(\
                remote_project_dir,
                htaccess_filename)                                                    # remote htaccess filepath
            self._check_and_copy_remote_file(\
              remote_user=remote_user,
              remote_host=remote_host,
              source_file=htaccess_output,
              remote_file=remote_htaccess_file)                                       # copy htaccess file to remote dir
            remote_htpasswd_file = \
              os.path.join(\
                remote_project_dir,
                htpasswd_filename)                                                    # remote htpasswd filepath
            self._check_and_copy_remote_file(\
              remote_user=remote_user,
              remote_host=remote_host,
              source_file=htpasswd_output,
              remote_file=remote_htpasswd_file)                                       # copy htpasswd file to remote dir
            remote_project_output_file = \
              os.path.join(\
                remote_project_dir,
                os.path.basename(project_output))                                     # remote project output filepath
            self._check_and_copy_remote_file(\
              remote_user=remote_user,
              remote_host=remote_host,
              source_file=project_output,
              remote_file=remote_project_output_file)                                 # copy project output file to remote dir
            remote_status_output_file = \
              os.path.join(\
                remote_project_dir,
                os.path.basename(status_output))                                      # remote project status output filepath
            self._check_and_copy_remote_file(\
              remote_user=remote_user,
              remote_host=remote_host,
              source_file=status_output,
              remote_file=remote_status_output_file)                                  # copy project status output file to remote dir
            remote_analysis_output_file = \
              os.path.join(\
                remote_project_dir,
                os.path.basename(analysis_output))                                    # remote project analysis output filepath
            self._check_and_copy_remote_file(\
              remote_user=remote_user,
              remote_host=remote_host,
              source_file=analysis_output,
              remote_file=remote_analysis_output_file)                                # copy project analysis output file to remote dir
            remote_analysis_viewer_output_file = \
              os.path.join(\
                remote_project_dir,
                os.path.basename(analysis_viewer_output))                             # remote project analysis viewer output filepath
            self._check_and_copy_remote_file(\
              remote_user=remote_user,
              remote_host=remote_host,
              source_file=analysis_viewer_output,
              remote_file=remote_analysis_viewer_output_file)                         # copy project analysis viewer output file to remote dir
            self.param('dataflow_params', {'remote_dir_status': 'done'})
            remove_dir(temp_work_dir)
        except Exception as e:
            message = \
              'seqrun: {2}, Error in {0}: {1}'.\
              format(\
                self.__class__.__name__,
                e,
                seqrun_igf_id)
            self.warning(message)
            self.post_message_to_slack(
                message, reaction='fail')  # post msg to slack for failed jobs
            raise