def test_load_file_to_disk_and_db7(self):
     au = Analysis_collection_utils(dbsession_class=self.session_class,
                                    analysis_name='AnalysisA',
                                    tag_name='TagA',
                                    collection_name='RunA',
                                    collection_type='AnalysisA_Files',
                                    collection_table='run',
                                    base_path=self.temp_base_dir)
     input_file_list = [
         os.path.join(self.temp_work_dir, file_name)
         for file_name in self.input_list
     ]
     output_list = au.load_file_to_disk_and_db(
         input_file_list=input_file_list,
         withdraw_exisitng_collection=False
     )  # loading all files to same collection
     base = BaseAdaptor(**{'session_class': self.session_class})
     base.start_session()
     ca = CollectionAdaptor(**{'session': base.session})
     ca_files = ca.get_collection_files(collection_name='RunA',
                                        collection_type='AnalysisA_Files',
                                        output_mode='dataframe')
     file_list = list(ca_files['file_path'].to_dict().values())
     datestamp = get_datestamp_label()
     test_file = os.path.join(
         self.temp_base_dir, 'ProjectA', 'SampleA', 'ExperimentA', 'RunA',
         'AnalysisA', '{0}_{1}_{2}_{3}.{4}'.format('RunA', 'AnalysisA',
                                                   'TagA', datestamp,
                                                   'cram'))
     test_file = preprocess_path_name(input_path=test_file)
     self.assertTrue(test_file in file_list)
     self.assertTrue(test_file in output_list)
     base.close_session()
 def test_load_file_to_disk_and_db8(self):
     au = Analysis_collection_utils(dbsession_class=self.session_class,
                                    analysis_name='AnalysisA',
                                    tag_name='TagA',
                                    collection_name='RunA',
                                    collection_type='AnalysisA_Files',
                                    collection_table='run')
     input_file = os.path.join(self.temp_work_dir, 'a.cram')
     input_file = preprocess_path_name(input_path=input_file)
     new_file_name = au.get_new_file_name(input_file=input_file)
     datestamp = get_datestamp_label()
     test_file_name = '{0}_{1}_{2}_{3}.{4}'.format('RunA', 'AnalysisA',
                                                   'TagA', datestamp,
                                                   'cram')
     self.assertEqual(new_file_name, test_file_name)
    def load_file_to_disk_and_db(self,
                                 input_file_list,
                                 withdraw_exisitng_collection=True,
                                 autosave_db=True,
                                 file_suffix=None,
                                 force=True,
                                 remove_file=False):
        '''
    A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present.
    Directory structure of the final path is based on the collection_table information.
    
    Following will be the final directory structure if base_path is present
    
    project - base_path/project_igf_id/analysis_name
    sample - base_path/project_igf_id/sample_igf_id/analysis_name
    experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name
    run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name
    
    :param input_file_list: A list of input file to load, all using the same collection info
    :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files
    :param autosave_db: Save changes to database, default True
    :param file_suffix: Use a specific file suffix, use None if it should be same as original file
                        e.g. input.vcf.gz to  output.vcf.gz
    :param force: Toggle for removing existing file, default True
    :param remove_file: A toggle for removing existing file from disk, default False
    :returns: A list of final filepath
    '''
        try:
            project_igf_id = None
            sample_igf_id = None
            experiment_igf_id = None
            experiment_igf_id = None
            run_igf_id = None
            output_path_list = list()  # define empty output list
            dbconnected = False
            if self.collection_name is None or \
               self.collection_type is None or \
               self.collection_table is None:
                raise ValueError('File collection information is incomplete'
                                 )  # check for collection information

            base = BaseAdaptor(**{'session_class': self.dbsession_class})
            base.start_session()  # connect to db
            dbconnected = True
            if self.base_path is not None:
                if self.collection_table == 'sample':
                    sa = SampleAdaptor(**{'session': base.session})
                    sample_igf_id = self.collection_name
                    sample_exists = sa.check_sample_records_igf_id(
                        sample_igf_id=sample_igf_id)
                    if not sample_exists:
                        raise ValueError('Sample {0} not found in db'.\
                                         format(sample_igf_id))

                    project_igf_id = \
                      sa.fetch_sample_project(sample_igf_id=sample_igf_id)                # fetch project id for sample
                elif self.collection_table == 'experiment':
                    ea = ExperimentAdaptor(**{'session': base.session})
                    experiment_igf_id = self.collection_name
                    experiment_exists = \
                      ea.check_experiment_records_id(
                        experiment_igf_id=experiment_igf_id)
                    if not experiment_exists:
                        raise ValueError('Experiment {0} not present in database'.\
                                         format(experiment_igf_id))

                    (project_igf_id,sample_igf_id) = \
                        ea.fetch_project_and_sample_for_experiment(
                          experiment_igf_id=experiment_igf_id)                            # fetch project and sample id for experiment
                elif self.collection_table == 'run':
                    ra = RunAdaptor(**{'session': base.session})
                    run_igf_id = self.collection_name
                    run_exists = ra.check_run_records_igf_id(
                        run_igf_id=run_igf_id)
                    if not run_exists:
                        raise ValueError('Run {0} not found in database'.\
                                         format(run_igf_id))

                    (project_igf_id,sample_igf_id,experiment_igf_id) = \
                      ra.fetch_project_sample_and_experiment_for_run(
                        run_igf_id=run_igf_id)                                            # fetch project, sample and experiment id for run
                elif self.collection_table == 'project':
                    pa = ProjectAdaptor(**{'session': base.session})
                    project_igf_id = self.collection_name
                    project_exists = \
                      pa.check_project_records_igf_id(
                        project_igf_id=project_igf_id)
                    if not project_exists:
                        raise ValueError('Project {0} not found in database'.\
                                         format(project_igf_id))

            if self.rename_file and self.analysis_name is None:
                raise ValueError('Analysis name is required for renaming file'
                                 )  # check analysis name

            for input_file in input_file_list:
                final_path = ''
                if self.base_path is None:  # do not move file if base_path is absent
                    final_path = os.path.dirname(input_file)
                else:  # move file path
                    if self.collection_table == 'project':
                        if project_igf_id is None:
                            raise ValueError('Missing project id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            self.analysis_name)                                             # final path for project
                    elif self.collection_table == 'sample':
                        if project_igf_id is None or \
                           sample_igf_id is None:
                            raise ValueError('Missing project and sample id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            self.analysis_name)                                             # final path for sample
                    elif self.collection_table == 'experiment':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None:
                            raise ValueError('Missing project,sample and experiment id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            self.analysis_name)                                             # final path for experiment
                    elif self.collection_table == 'run':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None or \
                           run_igf_id is None:
                            raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(\
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            run_igf_id,
                            self.analysis_name)                                             # final path for run

                if self.rename_file:
                    new_filename = \
                      self.get_new_file_name(
                        input_file=input_file,
                        file_suffix=file_suffix)
                    final_path = \
                      os.path.join(
                        final_path,
                        new_filename)                                                     # get new filepath
                else:
                    final_path = \
                      os.path.join(
                        final_path,
                        os.path.basename(input_file))

                if final_path != input_file:  # move file if its required
                    final_path = preprocess_path_name(
                        input_path=final_path
                    )  # remove unexpected characters from file path
                    move_file(source_path=input_file,
                              destinationa_path=final_path,
                              force=force
                              )  # move or overwrite file to destination dir

                output_path_list.append(
                    final_path)  # add final path to the output list
                self.create_or_update_analysis_collection(
                    file_path=final_path,
                    dbsession=base.session,
                    withdraw_exisitng_collection=withdraw_exisitng_collection,
                    remove_file=remove_file,
                    autosave_db=autosave_db)  # load new file collection in db
                if autosave_db:
                    base.commit_session()  # save changes to db for each file

            base.commit_session()  # save changes to db
            base.close_session()  # close db connection
            return output_path_list
        except:
            if dbconnected:
                base.rollback_session()
                base.close_session()
            raise