def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class()
def __init__( self, igf_id_list, table_name, pipeline_name, dbconfig_file, log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, clean_up=True, ): ''' :param igf_id_list: A list of igf ids to uniquely identify the entity :param table_name: A database table name to look for the igf id available options are 'project','sample','experiment','run', 'file','seqrun','collection' :param pipeline_name: A pipeline name to change the status of the seed :param dbconfig_file: A file containing the database configuration :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param clean_up: Clean up input file once its processed, default True ''' try: self.igf_id_list = igf_id_list if table_name not in ('project', 'sample', 'experiment', 'run', 'file', 'seqrun', 'collection'): raise ValueError('Table {0} not supported for pipeline seed'.\ format(table_name)) self.table_name = table_name self.pipeline_name = pipeline_name self.clean_up = clean_up dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) self.log_slack = log_slack self.log_asana = log_asana if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise
def __init__(self, projet_info_path, dbconfig, user_account_template, log_slack=True, slack_config=None, check_hpc_user=False, hpc_user=None, hpc_address=None, ldap_server=None, setup_irods=True, notify_user=True, default_user_email='*****@*****.**', project_lookup_column='project_igf_id', user_lookup_column='email_id', data_authority_column='data_authority', sample_lookup_column='sample_igf_id', barcode_check_keyword='barcode_check', metadata_sheet_name='Project metadata', sendmail_exe='/usr/sbin/sendmail'): try: self.projet_info_path = projet_info_path self.user_account_template = user_account_template self.project_lookup_column = project_lookup_column self.user_lookup_column = user_lookup_column self.sample_lookup_column = sample_lookup_column self.data_authority_column = data_authority_column self.log_slack = log_slack dbparams = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparams) self.session_class = base.get_session_class() self.setup_irods = setup_irods self.notify_user = notify_user self.default_user_email = default_user_email self.barcode_check_keyword = barcode_check_keyword self.check_hpc_user = check_hpc_user self.hpc_user = hpc_user self.hpc_address = hpc_address self.ldap_server = ldap_server self.metadata_sheet_name = metadata_sheet_name self.sendmail_exe = sendmail_exe if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config=slack_config) if check_hpc_user and (hpc_user is None or \ hpc_address is None or \ ldap_server is None): raise ValueError('Hpc user {0} address {1}, and ldap server {2} are required for check_hpc_user'.\ format(hpc_user,hpc_address,ldap_server)) except: raise
def setUp(self): self.data_file = 'data/platform.json' self.dbconfig = 'data/dbconfig.json' dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] self.pipeline_name = '' Base.metadata.create_all(self.engine) self.session_class = base.get_session_class()
def test_reset_pipeline_seed_for_rerun(self): base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() sra = SeqrunAdaptor(**{'session': base.session}) seqrun = sra.fetch_seqrun_records_igf_id( seqrun_igf_id='171003_M00001_0089_000000000-TEST') pp = PipelineAdaptor(**{'session': base.session}) pipeline = pp.fetch_pipeline_records_pipeline_name( 'demultiplexing_fastq') pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed.status, 'SEEDED') pp.update_pipeline_seed(data=[{ 'pipeline_id': pipeline.pipeline_id, 'seed_id': seqrun.seqrun_id, 'seed_table': 'seqrun', 'status': 'FINISHED', }]) pipe_seed2 = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed2.status, 'FINISHED') base.close_session() with open(self.seqrun_input_list, 'w') as fp: fp.write('171003_M00001_0089_000000000-TEST') mps = Modify_pipeline_seed(igf_id_list=self.seqrun_input_list, table_name='seqrun', pipeline_name='demultiplexing_fastq', dbconfig_file=self.dbconfig, log_slack=False, log_asana=False, clean_up=True) mps.reset_pipeline_seed_for_rerun(seeded_label='SEEDED') base.start_session() sra = SeqrunAdaptor(**{'session': base.session}) seqrun = sra.fetch_seqrun_records_igf_id( seqrun_igf_id='171003_M00001_0089_000000000-TEST') pp = PipelineAdaptor(**{'session': base.session}) pipeline = pp.fetch_pipeline_records_pipeline_name( 'demultiplexing_fastq') pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed.status, 'SEEDED') base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) self.base = BaseAdaptor(**dbparam) self.engine = self.base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine)
def test_find_new_analysis_seeds2(self): base = BaseAdaptor(**{'session_class': self.session_class}) project_name_file = os.path.join(self.temp_dir, 'project_name_list.txt') with open(project_name_file, 'w') as fp: fp.write('projectA') available_exps,seeded_exps = \ find_new_analysis_seeds(\ dbconfig_path=self.dbconfig, pipeline_name='PrimaryAnalysis', project_name_file=project_name_file, species_name_list=['HG38'], fastq_type='demultiplexed_fastq', library_source_list=['TRANSCRIPTOMIC_SINGLE_CELL'] ) self.assertTrue(available_exps is None) self.assertTrue('projectA' in seeded_exps) pla = PipelineAdaptor(**{'session_class': self.session_class}) pla.start_session() seeded_data, exp_data = pla.fetch_pipeline_seed_with_table_data(\ pipeline_name='PrimaryAnalysis', table_name='experiment', status='SEEDED') pla.close_session() exp_data = exp_data.to_dict(orient='records') self.assertTrue(len(exp_data), 1) self.assertEqual(exp_data[0]['experiment_igf_id'], 'sampleA_MISEQ')
def __init__(self, igf_session_class, project_igf_id, seqrun_work_day=2, analysis_work_day=1, sequencing_resource_name='Sequencing', demultiplexing_resource_name='Demultiplexing', analysis_resource_name='Primary Analysis', task_id_label='task_id', task_name_label='task_name', resource_label='resource', dependencies_label='dependencies', start_date_label='start_date', end_date_label='end_date', duration_label='duration', percent_complete_label='percent_complete'): self.project_igf_id = project_igf_id self.base_adaptor = BaseAdaptor(**{'session_class': igf_session_class}) self.seqrun_work_day = seqrun_work_day self.analysis_work_day = analysis_work_day self.sequencing_resource_name = sequencing_resource_name self.demultiplexing_resource_name = demultiplexing_resource_name self.analysis_resource_name = analysis_resource_name self.task_id_label = task_id_label self.task_name_label = task_name_label self.resource_label = resource_label self.start_date_label = start_date_label self.end_date_label = end_date_label self.duration_label = duration_label self.percent_complete_label = percent_complete_label self.dependencies_label = dependencies_label
def setUp(self): self.dbconfig = 'data/dbconfig.json' self.platform_json = 'data/platform_db_data.json' self.seqrun_json = 'data/seqrun_db_data.json' self.pipeline_json = 'data/pipeline_data.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() # load platform data pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=read_json_data(self.platform_json)) # load seqrun data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data( data=read_json_data(self.seqrun_json)) # load platform data pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=read_json_data(self.pipeline_json)) pipeline_seed_data = [ { 'pipeline_name': 'demultiplexing_fastq', 'seed_id': '1', 'seed_table': 'seqrun' }, ] pla.create_pipeline_seed(data=pipeline_seed_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) data = [{ 'project_igf_id': 'IGFP001_test1_24-1-18', }, { 'project_igf_id': 'IGFP002_test1_24-1-18', 'barcode_check': 'ON' }, { 'project_igf_id': 'IGFP003_test1_24-1-18', 'barcode_check': 'OFF' }] self.data = pd.DataFrame(data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.platform_data=\ [{"platform_igf_id" : "M03291" , "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA" , "software_version" : "RTA1.18.54" }, {"platform_igf_id" : "NB501820", "model_name" : "NEXTSEQ", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }, {"platform_igf_id" : "K00345", "model_name" : "HISEQ4000", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }] self.flowcell_rule_data=\ [{"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 SR", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}, {"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 PE", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"NB501820", "flowcell_type":"NEXTSEQ", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"M03291", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}]
def _fetch_track_files_with_metadata(self, level='experiment'): ''' An internal method for fetching track files with the metadata information :param level: Specific level for fetching metadata information, default 'experiment' :returns: A pandas dataframe object ''' try: if level == 'experiment': base = BaseAdaptor(**{'session_class': self.dbsession_class}) base.start_session() query = \ base.session.\ query( Project.project_igf_id, Sample.sample_igf_id, Experiment.experiment_igf_id, Experiment.library_source, Experiment.library_strategy, Experiment.experiment_type, Collection.name, Collection.type, File.file_path, Pipeline.pipeline_name, Pipeline_seed.status).\ join(Sample,Project.project_id==Sample.project_id).\ join(Experiment,Sample.sample_id==Experiment.sample_id).\ join(Collection,Collection.name==Experiment.experiment_igf_id).\ join(Collection_group,Collection.collection_id==Collection_group.collection_id).\ join(File,File.file_id==Collection_group.file_id).\ join(Pipeline_seed,Pipeline_seed.seed_id==Experiment.experiment_id).\ join(Pipeline,Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Project.project_id==Sample.project_id).\ filter(Sample.sample_id==Experiment.sample_id).\ filter(Sample.status=='ACTIVE').\ filter(Experiment.status=='ACTIVE').\ filter(Collection.type.in_(self.collection_type_list)).\ filter(Collection.table==self.collection_table).\ filter(Collection.collection_id==Collection_group.collection_id).\ filter(File.file_id==Collection_group.file_id).\ filter(File.status=='ACTIVE').\ filter(Pipeline_seed.status=='FINISHED').\ filter(Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Pipeline.pipeline_name==self.pipeline_name).\ filter(Project.project_igf_id==self.project_igf_id) records = \ base.fetch_records( query=query, output_mode='dataframe') base.close_session() return records else: raise ValueError('No support for {0} tracks'.format(level)) except: raise
def test_process_project_data_and_account(self): fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\ dbconfig=self.dbconfig,\ user_account_template='template/email_notification/send_new_account_info.txt',\ log_slack=False,\ setup_irods=False,\ notify_user=False,\ check_hpc_user=False,\ ) fa.process_project_data_and_account() dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) base.start_session() pa=ProjectAdaptor(**{'session':base.session}) project_exists=pa.check_project_records_igf_id(project_igf_id='IGFP0002_test_23-5-2017_rna') self.assertTrue(project_exists) ua=UserAdaptor(**{'session':base.session}) user_exists=ua.check_user_records_email_id(email_id='*****@*****.**') self.assertTrue(user_exists) user1=ua.fetch_user_records_email_id(user_email_id='*****@*****.**') self.assertEqual(user1.name,'User2') sa=SampleAdaptor(**{'session':base.session}) sample_exists=sa.check_sample_records_igf_id(sample_igf_id='IGF00006') self.assertTrue(sample_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) base.close_session()
def test_load_file_to_disk_and_db1(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='ProjectA', collection_type='AnalysisA_Files', collection_table='project') input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=False ) # loading all files to same collection base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') self.assertEqual(len(ca_files.index), len(self.input_list)) # compare with input list self.assertEqual(len(output_list), len(self.input_list)) # compare with output list base.close_session()
def test_load_file_to_disk_and_db7(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='RunA', collection_type='AnalysisA_Files', collection_table='run', base_path=self.temp_base_dir) input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=False ) # loading all files to same collection base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='RunA', collection_type='AnalysisA_Files', output_mode='dataframe') file_list = list(ca_files['file_path'].to_dict().values()) datestamp = get_datestamp_label() test_file = os.path.join( self.temp_base_dir, 'ProjectA', 'SampleA', 'ExperimentA', 'RunA', 'AnalysisA', '{0}_{1}_{2}_{3}.{4}'.format('RunA', 'AnalysisA', 'TagA', datestamp, 'cram')) test_file = preprocess_path_name(input_path=test_file) self.assertTrue(test_file in file_list) self.assertTrue(test_file in output_list) base.close_session()
def test_load_file_to_disk_and_db4(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='ProjectA', collection_type='AnalysisA_Files', collection_table='project', rename_file=False) input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=False ) # loading all files to same collection, without rename base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') file_list = list(ca_files['file_path'].to_dict().values()) self.assertTrue(input_file_list[0] in file_list) self.assertTrue(input_file_list[0] in output_list) base.close_session()
def test_load_file_to_disk_and_db2(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='ProjectA', collection_type='AnalysisA_Files', collection_table='project') input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=True ) # withdrawing existing collection group before loading new base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') self.assertEqual(len(ca_files.index), 1) # check for unique collection group fa = FileAdaptor(**{'session': base.session}) query = fa.session.query(File) fa_records = fa.fetch_records(query=query, output_mode='dataframe') self.assertEqual( len(fa_records['file_path'].to_dict()), 3 ) # check if all files are present although only one collection group exists self.assertEqual(len(output_list), 3) base.close_session()
def clean_and_rebuild_database(dbconfig): ''' A method for deleting data in database and create empty tables :param dbconfig: A json file containing the database connection info ''' try: dbparam = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparam) Base.metadata.drop_all(base.engine) Base.metadata.create_all(base.engine) except: raise
def __init__(self, seqrun_path, seqrun_igf_list, dbconfig_file, clean_up=True, json_collection_type='ILLUMINA_BCL_MD5', log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, samplesheet_name='SampleSheet.csv'): ''' :param seqrun_path: A directory path for sequencing run home :param seqrun_igf_list: A file path listing sequencing runs to reset :param dbconfig_file: A file containing the database configuration :param clean_up: Clean up input file once its processed, default True :param json_collection_type: A collection type for md5 json file lookup, default ILLUMINA_BCL_MD5 :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param samplesheet_name: Name of the samplesheet file, default SampleSheet.csv ''' try: self.seqrun_path = seqrun_path self.seqrun_igf_list = seqrun_igf_list self.json_collection_type = json_collection_type self.log_slack = log_slack self.log_asana = log_asana self.clean_up = clean_up self.samplesheet_name = samplesheet_name dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise
def fetch_input(self): ''' Fetch input method for base runnable :param dbconfig: A database configuration json file :param log_slack: A toggle for writing logs to slack :param log_asana: A toggle for writing logs to asana ''' try: dbconfig = self.param_required('dbconfig') dbparams = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparams) session_class = base.get_session_class() self.param('igf_session_class', session_class) # set session class for pipeline if self.param('log_slack'): slack_config = self.param_required('slack_config') igf_slack = IGF_slack(slack_config=slack_config) self.param('igf_slack', igf_slack) except: raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.json_file_path = 'data/reset_samplesheet_md5/seqrun1_file_md5.json' json_data = pd.DataFrame([{ 'file_md5': '1e7531158974b5a5b7cbb7dde09ac779', 'seqrun_file_name': 'SampleSheet.csv' }, { 'file_md5': '2b22f945bc9e7e390af5432425783a03', 'seqrun_file_name': 'RTAConfiguration.xml' }]) with open(self.json_file_path, 'w') as jp: json.dump(json_data.to_dict(orient='record'), jp, indent=4) self.initial_json_md5 = calculate_file_checksum( filepath=self.json_file_path) self.correct_samplesheet_md5 = '259ed03f2e8c45980de121f7c3a70565' self.json_collection_name = 'seqrun1' self.json_collection_type = 'ILLUMINA_BCL_MD5' self.seqrun_path = 'data/reset_samplesheet_md5' self.seqrun_input_list = 'data/reset_samplesheet_md5/seqrun_input_list.txt' ca = CollectionAdaptor(**{'session_class': self.session_class}) ca.start_session() data = pd.DataFrame([{ 'name': self.json_collection_name, 'type': self.json_collection_type, 'table': 'seqrun', 'file_path': self.json_file_path, }]) ca.load_file_and_create_collection(data, autosave=True, hasher='md5') ca.close_session() with open(self.seqrun_input_list, 'w') as fp: fp.write(self.json_collection_name)
def __init__(self,dbconfig_file,log_slack=True,slack_config=None): ''' :param dbconfig_file: A database configuration file path :param log_slack: A boolean flag for toggling Slack messages, default True :param slack_config: A file containing Slack tokens, default None ''' try: dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor=BaseAdaptor(**dbparams) self.log_slack=log_slack if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object except: raise
def setUp(self): self.dbconfig='data/dbconfig.json' self.new_project_data='data/check_project_data/new_project_data.csv' dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class=base.session_class base.start_session() ua=UserAdaptor(**{'session':base.session}) user_data=[{'name':'user1','email_id':'*****@*****.**','username':'******'}, {'name':'igf','email_id':'*****@*****.**','username':'******'}] ua.store_user_data(data=user_data) project_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'project_name':'test_22-8-2017_rna', 'description':'Its project 1', 'project_deadline':'Before August 2017', 'comments':'Some samples are treated with drug X', }] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) project_user_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'email_id':'*****@*****.**', 'data_authority':True}] pa.assign_user_to_project(data=project_user_data) sample_data=[{'sample_igf_id':'IGF00001', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00002', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00003', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00004', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00005', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, ] sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) base.close_session() new_project_data=[{'project_igf_id':'IGFP0002_test_23-5-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00006', }, {'project_igf_id':'IGFP0003_test_24-8-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00007', 'barcode_check':'OFF' }] pd.DataFrame(new_project_data).to_csv(os.path.join('.',self.new_project_data))
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] base.start_session() pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sa = SampleAdaptor(**{'session': base.session}) sample_data = [ { 'sample_igf_id': 'IGFS001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS004', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'status': 'FAILED', }, ] sa.store_sample_and_attribute_data(data=sample_data) base.close_session()
def test_check_existing_data(self): fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\ dbconfig=self.dbconfig,\ user_account_template='template/email_notification/send_new_account_info.txt',\ log_slack=False,\ check_hpc_user=False,\ ) project_data1=pd.DataFrame([{'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'project_igf_id':'IGFP0002_test_23-5-2017_rna',}, ] ) base=BaseAdaptor(**{'session_class':self.session_class}) base.start_session() project_data1=project_data1.apply(lambda x: fa._check_existing_data(data=x,\ dbsession=base.session,\ table_name='project'),\ axis=1) project_data1=project_data1[project_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(project_data1[0]['project_igf_id'],'IGFP0002_test_23-5-2017_rna') user_data1=pd.DataFrame([{'name':'user1','email_id':'*****@*****.**'},\ {'name':'user3','email_id':'*****@*****.**'},\ ]) user_data1=user_data1.apply(lambda x: fa._check_existing_data(data=x,\ dbsession=base.session,\ table_name='user'),\ axis=1) user_data1=user_data1[user_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(user_data1[0]['email_id'],'*****@*****.**') sample_data1=pd.DataFrame([{'sample_igf_id':'IGF00001','project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00007','project_igf_id':'IGFP0001_test_22-8-2017_rna',},]) sample_data1=sample_data1.apply(lambda x: fa._check_existing_data(data=x,\ dbsession=base.session,\ table_name='sample'),\ axis=1) sample_data1=sample_data1[sample_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(sample_data1[0]['sample_igf_id'],'IGF00007') project_user_data1=pd.DataFrame([{'project_igf_id':'IGFP0001_test_22-8-2017_rna'\ ,'email_id':'*****@*****.**'},\ {'project_igf_id':'IGFP0002_test_23-5-2017_rna',\ 'email_id':'*****@*****.**'},\ ] ) project_user_data1=project_user_data1.apply(lambda x: fa._check_existing_data(\ data=x,\ dbsession=base.session,\ table_name='project_user'),\ axis=1) project_user_data1=project_user_data1[project_user_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(project_user_data1[0]['project_igf_id'],'IGFP0002_test_23-5-2017_rna') base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X' }] user_data = [{ 'name': 'UserA', 'email_id': '*****@*****.**', 'username': '******' }] project_user_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'email_id': '*****@*****.**', 'data_authority': True }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'email_id': '*****@*****.**' }] base.start_session() ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data) pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) pa.assign_user_to_project(data=project_user_data) base.close_session()
def setUp(self): self.dbconfig='data/dbconfig.json' self.platform_json='data/platform_db_data.json' self.seqrun_json='data/seqrun_db_data.json' self.pipeline_json='data/pipeline_data.json' self.flowcell_rules_json='data/flowcell_rules.json' dbparam=read_dbconf_json(self.dbconfig) base=BaseAdaptor(**dbparam) self.engine=base.engine self.dbname=dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() # load platform data pl=PlatformAdaptor(**{'session':base.session}) pl.store_platform_data(data=read_json_data(self.platform_json)) pl.store_flowcell_barcode_rule(data=read_json_data(self.flowcell_rules_json)) # load seqrun data sra=SeqrunAdaptor(**{'session':base.session}) sra.store_seqrun_and_attribute_data(data=read_json_data(self.seqrun_json)) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() project_data=[{'project_igf_id':'ProjectA'}] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) self.temp_dir=get_temp_dir() temp_files=['a.csv','b.csv'] for temp_file in temp_files: with open(os.path.join(self.temp_dir,temp_file),'w') as fp: fp.write('A') collection_data=[{'name':'ExperimentA', 'type':'AnalysisA_html', 'table':'experiment', 'file_path':os.path.join(self.temp_dir,temp_file)} for temp_file in temp_files] ca=CollectionAdaptor(**{'session':base.session}) ca.load_file_and_create_collection(data=collection_data, calculate_file_size_and_md5=False) base.close_session()
def test_load_seqrun_files_to_db(self): valid_seqrun_dir = find_new_seqrun_dir(path=self.path, dbconfig=self.dbconfig) new_seqrun_and_md5 = calculate_file_md5(seqrun_info=valid_seqrun_dir, md5_out=self.md5_out_path, seqrun_path=self.path) load_seqrun_files_to_db(seqrun_info=valid_seqrun_dir, seqrun_md5_info=new_seqrun_and_md5, dbconfig=self.dbconfig) # check in db dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) sra = SeqrunAdaptor(**dbparam) sra.start_session() sra_data = sra.fetch_seqrun_records_igf_id(seqrun_igf_id='seqrun1') sra.close_session() self.assertEqual(sra_data.flowcell_id, 'HXXXXXXXX') seed_pipeline_table_for_new_seqrun( pipeline_name='demultiplexing_fastq', dbconfig=self.dbconfig) # check in db dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) base.start_session() seeds=base.fetch_records(query=base.session.query(Seqrun.seqrun_igf_id).\ join(Pipeline_seed, Pipeline_seed.seed_id==Seqrun.seqrun_id).\ join(Pipeline, Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Pipeline.pipeline_name=='demultiplexing_fastq').\ filter(Pipeline_seed.seed_table=='seqrun'), output_mode='object') base.close_session() self.assertTrue('seqrun1' in [s.seqrun_igf_id for s in seeds])
class Modify_pipeline_seed: ''' A class for changing pipeline run status in the pipeline_seed table ''' def __init__( self, igf_id_list, table_name, pipeline_name, dbconfig_file, log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, clean_up=True, ): ''' :param igf_id_list: A list of igf ids to uniquely identify the entity :param table_name: A database table name to look for the igf id available options are 'project','sample','experiment','run', 'file','seqrun','collection' :param pipeline_name: A pipeline name to change the status of the seed :param dbconfig_file: A file containing the database configuration :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param clean_up: Clean up input file once its processed, default True ''' try: self.igf_id_list = igf_id_list if table_name not in ('project', 'sample', 'experiment', 'run', 'file', 'seqrun', 'collection'): raise ValueError('Table {0} not supported for pipeline seed'.\ format(table_name)) self.table_name = table_name self.pipeline_name = pipeline_name self.clean_up = clean_up dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) self.log_slack = log_slack self.log_asana = log_asana if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise def _fetch_pipeline_seed_entry(self, igf_id, select_seed_status=None, restrict_seed_status=None): ''' An internal method for fetching unique pipeline seed entry from database :param igf_id: A igf id to uniquely select pipe seed data :param select_seed_status: A list of seed status to include from the query, default None :param restrict_seed_status: A list of seed status to exclude from the query, default None ''' try: query = None if self.table_name == 'seqrun': query=self.base_adaptor.session.\ query(Pipeline_seed).\ join(Seqrun,Pipeline_seed.seed_id==Seqrun.seqrun_id).\ join(Pipeline).\ filter(Seqrun.seqrun_igf_id==igf_id).\ filter(Pipeline_seed.seed_table==self.table_name).\ filter(Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Pipeline.pipeline_name==self.pipeline_name) # get base query for seqrun table else: raise ValueError('Table {0} not supported for pipeline status reset'.\ format(self.table)) if select_seed_status is not None and \ isinstance(select_seed_status,list) and \ len(select_seed_status) > 0: query = query.filter( Pipeline_seed.status.in_( select_seed_status)) # add generic select filter if restrict_seed_status is not None and \ isinstance(restrict_seed_status,list) and \ len(restrict_seed_status)>0: query = query.filter( not_(Pipeline_seed.status.in_( restrict_seed_status))) # add generic restrict filter pipeseed_data=self.base_adaptor.fetch_records(query,\ output_mode='one_or_none') # fetch unique value for pipeline seed return pipeseed_data except: raise def reset_pipeline_seed_for_rerun(self, seeded_label='SEEDED', restricted_status_list=('SEEDED', 'RUNNING')): ''' A method for setting the pipeline for re-run if the first run has failed or aborted This method will set the pipeline_seed.status as 'SEEDED' only if its not already 'SEEDED' or 'RUNNING' :param seeded_label: A text label for seeded status, default SEEDED :param restricted_status_list: A list of pipeline status to exclude from the search, default ['SEEDED','RUNNING'] ''' try: db_connected = False restricted_status_list = list(restricted_status_list) input_id_list = self._read_input_list( igf_id_list=self.igf_id_list) # get input ids from file failed_ids = list() # define empty list of failed ids pass_list = list() # required for logging in asana base = self.base_adaptor base.start_session() # connect to database db_connected = True for igf_id in input_id_list: pipe_seed_data = self._fetch_pipeline_seed_entry( igf_id=igf_id, restrict_seed_status=restricted_status_list ) # get pipe seed data for igf id if pipe_seed_data is None: failed_ids.append(igf_id) # add igf id to failed list else: pl = PipelineAdaptor(**{'session': base.session }) # connect to pipeline adaptor updated_seed_data = [{ 'pipeline_id': pipe_seed_data.pipeline_id, 'seed_id': pipe_seed_data.seed_id, 'seed_table': pipe_seed_data.seed_table, 'status': seeded_label }] # set data for seed update pl.update_pipeline_seed( data=updated_seed_data, autosave=False) # update data to pipeline seed table pass_list.append(igf_id) base.commit_session() # save data to database after all changes base.close_session() # close database connection db_connected = False if self.clean_up: self._clear_input_list( file_path=self.igf_id_list, igf_list=failed_ids ) # over write input list and add failed ids for next try message = 'Overwriting pipeseed input list {0}'.format( self.igf_id_list) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass' ) # comment to slack for file over writing if len(pass_list) > 0: for id_line in pass_list: message='Changed pipeline seed for id {0}, pipeline {1}, to {2}'.\ format(id_line,self.pipeline_name,seeded_label) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass') # comment to slack channel if self.log_asana: self.igf_asana.comment_asana_task( task_name=id_line, comment=message) # comment on asana task except Exception as e: if db_connected: base.rollback_session() base.close_session() message = 'Failed to update pipeline seed, Error: {0}'.format( e) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel(message, reaction='fail') raise @staticmethod def _clear_input_list(file_path, igf_list): ''' A static method for clearing the seqrun list file :param seqrun_igf_list: A file containing the sequencing run ids ''' try: if not os.path.exists(file_path): raise IOError('File {0} not found'.format(file_path)) with open(file_path, 'w') as fwp: fwp.write('\n'.join(igf_list)) # over write input list file except: raise @staticmethod def _read_input_list(igf_id_list): ''' A static method for reading list of ids from an input file to a list :param igf_id_list: A file containing the input igf ids :return list: A list of ids from the input file ''' try: if not os.path.exists(igf_id_list): raise IOError('File {0} not found'.format(seqrun_igf_list)) id_list = list() # define an empty list of igf ids with open(igf_id_list, 'r') as fp: id_list = [i.strip() for i in fp] # add ids to the list return id_list except: raise