def test_load_file_to_disk_and_db2(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='ProjectA', collection_type='AnalysisA_Files', collection_table='project') input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=True ) # withdrawing existing collection group before loading new base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') self.assertEqual(len(ca_files.index), 1) # check for unique collection group fa = FileAdaptor(**{'session': base.session}) query = fa.session.query(File) fa_records = fa.fetch_records(query=query, output_mode='dataframe') self.assertEqual( len(fa_records['file_path'].to_dict()), 3 ) # check if all files are present although only one collection group exists self.assertEqual(len(output_list), 3) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' self.platform_json = 'data/platform_db_data.json' self.seqrun_json = 'data/seqrun_db_data.json' self.pipeline_json = 'data/pipeline_data.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() # load platform data pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=read_json_data(self.platform_json)) # load seqrun data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data( data=read_json_data(self.seqrun_json)) # load platform data pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=read_json_data(self.pipeline_json)) pipeline_seed_data = [ { 'pipeline_name': 'demultiplexing_fastq', 'seed_id': '1', 'seed_table': 'seqrun' }, ] pla.create_pipeline_seed(data=pipeline_seed_data) base.close_session()
def test_load_file_to_disk_and_db7(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='RunA', collection_type='AnalysisA_Files', collection_table='run', base_path=self.temp_base_dir) input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=False ) # loading all files to same collection base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='RunA', collection_type='AnalysisA_Files', output_mode='dataframe') file_list = list(ca_files['file_path'].to_dict().values()) datestamp = get_datestamp_label() test_file = os.path.join( self.temp_base_dir, 'ProjectA', 'SampleA', 'ExperimentA', 'RunA', 'AnalysisA', '{0}_{1}_{2}_{3}.{4}'.format('RunA', 'AnalysisA', 'TagA', datestamp, 'cram')) test_file = preprocess_path_name(input_path=test_file) self.assertTrue(test_file in file_list) self.assertTrue(test_file in output_list) base.close_session()
def test_process_project_data_and_account(self): fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\ dbconfig=self.dbconfig,\ user_account_template='template/email_notification/send_new_account_info.txt',\ log_slack=False,\ setup_irods=False,\ notify_user=False,\ check_hpc_user=False,\ ) fa.process_project_data_and_account() dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) base.start_session() pa=ProjectAdaptor(**{'session':base.session}) project_exists=pa.check_project_records_igf_id(project_igf_id='IGFP0002_test_23-5-2017_rna') self.assertTrue(project_exists) ua=UserAdaptor(**{'session':base.session}) user_exists=ua.check_user_records_email_id(email_id='*****@*****.**') self.assertTrue(user_exists) user1=ua.fetch_user_records_email_id(user_email_id='*****@*****.**') self.assertEqual(user1.name,'User2') sa=SampleAdaptor(**{'session':base.session}) sample_exists=sa.check_sample_records_igf_id(sample_igf_id='IGF00006') self.assertTrue(sample_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) base.close_session()
def test_load_file_to_disk_and_db1(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='ProjectA', collection_type='AnalysisA_Files', collection_table='project') input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=False ) # loading all files to same collection base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') self.assertEqual(len(ca_files.index), len(self.input_list)) # compare with input list self.assertEqual(len(output_list), len(self.input_list)) # compare with output list base.close_session()
def test_load_file_to_disk_and_db4(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='ProjectA', collection_type='AnalysisA_Files', collection_table='project', rename_file=False) input_file_list = [ os.path.join(self.temp_work_dir, file_name) for file_name in self.input_list ] output_list = au.load_file_to_disk_and_db( input_file_list=input_file_list, withdraw_exisitng_collection=False ) # loading all files to same collection, without rename base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') file_list = list(ca_files['file_path'].to_dict().values()) self.assertTrue(input_file_list[0] in file_list) self.assertTrue(input_file_list[0] in output_list) base.close_session()
def _fetch_track_files_with_metadata(self, level='experiment'): ''' An internal method for fetching track files with the metadata information :param level: Specific level for fetching metadata information, default 'experiment' :returns: A pandas dataframe object ''' try: if level == 'experiment': base = BaseAdaptor(**{'session_class': self.dbsession_class}) base.start_session() query = \ base.session.\ query( Project.project_igf_id, Sample.sample_igf_id, Experiment.experiment_igf_id, Experiment.library_source, Experiment.library_strategy, Experiment.experiment_type, Collection.name, Collection.type, File.file_path, Pipeline.pipeline_name, Pipeline_seed.status).\ join(Sample,Project.project_id==Sample.project_id).\ join(Experiment,Sample.sample_id==Experiment.sample_id).\ join(Collection,Collection.name==Experiment.experiment_igf_id).\ join(Collection_group,Collection.collection_id==Collection_group.collection_id).\ join(File,File.file_id==Collection_group.file_id).\ join(Pipeline_seed,Pipeline_seed.seed_id==Experiment.experiment_id).\ join(Pipeline,Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Project.project_id==Sample.project_id).\ filter(Sample.sample_id==Experiment.sample_id).\ filter(Sample.status=='ACTIVE').\ filter(Experiment.status=='ACTIVE').\ filter(Collection.type.in_(self.collection_type_list)).\ filter(Collection.table==self.collection_table).\ filter(Collection.collection_id==Collection_group.collection_id).\ filter(File.file_id==Collection_group.file_id).\ filter(File.status=='ACTIVE').\ filter(Pipeline_seed.status=='FINISHED').\ filter(Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Pipeline.pipeline_name==self.pipeline_name).\ filter(Project.project_igf_id==self.project_igf_id) records = \ base.fetch_records( query=query, output_mode='dataframe') base.close_session() return records else: raise ValueError('No support for {0} tracks'.format(level)) except: raise
def setUp(self): self.dbconfig='data/dbconfig.json' self.new_project_data='data/check_project_data/new_project_data.csv' dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class=base.session_class base.start_session() ua=UserAdaptor(**{'session':base.session}) user_data=[{'name':'user1','email_id':'*****@*****.**','username':'******'}, {'name':'igf','email_id':'*****@*****.**','username':'******'}] ua.store_user_data(data=user_data) project_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'project_name':'test_22-8-2017_rna', 'description':'Its project 1', 'project_deadline':'Before August 2017', 'comments':'Some samples are treated with drug X', }] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) project_user_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'email_id':'*****@*****.**', 'data_authority':True}] pa.assign_user_to_project(data=project_user_data) sample_data=[{'sample_igf_id':'IGF00001', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00002', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00003', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00004', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00005', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, ] sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) base.close_session() new_project_data=[{'project_igf_id':'IGFP0002_test_23-5-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00006', }, {'project_igf_id':'IGFP0003_test_24-8-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00007', 'barcode_check':'OFF' }] pd.DataFrame(new_project_data).to_csv(os.path.join('.',self.new_project_data))
def test_check_existing_data(self): fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\ dbconfig=self.dbconfig,\ user_account_template='template/email_notification/send_new_account_info.txt',\ log_slack=False,\ check_hpc_user=False,\ ) project_data1=pd.DataFrame([{'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'project_igf_id':'IGFP0002_test_23-5-2017_rna',}, ] ) base=BaseAdaptor(**{'session_class':self.session_class}) base.start_session() project_data1=project_data1.apply(lambda x: fa._check_existing_data(data=x,\ dbsession=base.session,\ table_name='project'),\ axis=1) project_data1=project_data1[project_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(project_data1[0]['project_igf_id'],'IGFP0002_test_23-5-2017_rna') user_data1=pd.DataFrame([{'name':'user1','email_id':'*****@*****.**'},\ {'name':'user3','email_id':'*****@*****.**'},\ ]) user_data1=user_data1.apply(lambda x: fa._check_existing_data(data=x,\ dbsession=base.session,\ table_name='user'),\ axis=1) user_data1=user_data1[user_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(user_data1[0]['email_id'],'*****@*****.**') sample_data1=pd.DataFrame([{'sample_igf_id':'IGF00001','project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00007','project_igf_id':'IGFP0001_test_22-8-2017_rna',},]) sample_data1=sample_data1.apply(lambda x: fa._check_existing_data(data=x,\ dbsession=base.session,\ table_name='sample'),\ axis=1) sample_data1=sample_data1[sample_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(sample_data1[0]['sample_igf_id'],'IGF00007') project_user_data1=pd.DataFrame([{'project_igf_id':'IGFP0001_test_22-8-2017_rna'\ ,'email_id':'*****@*****.**'},\ {'project_igf_id':'IGFP0002_test_23-5-2017_rna',\ 'email_id':'*****@*****.**'},\ ] ) project_user_data1=project_user_data1.apply(lambda x: fa._check_existing_data(\ data=x,\ dbsession=base.session,\ table_name='project_user'),\ axis=1) project_user_data1=project_user_data1[project_user_data1['EXISTS']==False].to_dict(orient='region') self.assertEqual(project_user_data1[0]['project_igf_id'],'IGFP0002_test_23-5-2017_rna') base.close_session()
def test_reset_pipeline_seed_for_rerun(self): base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() sra = SeqrunAdaptor(**{'session': base.session}) seqrun = sra.fetch_seqrun_records_igf_id( seqrun_igf_id='171003_M00001_0089_000000000-TEST') pp = PipelineAdaptor(**{'session': base.session}) pipeline = pp.fetch_pipeline_records_pipeline_name( 'demultiplexing_fastq') pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed.status, 'SEEDED') pp.update_pipeline_seed(data=[{ 'pipeline_id': pipeline.pipeline_id, 'seed_id': seqrun.seqrun_id, 'seed_table': 'seqrun', 'status': 'FINISHED', }]) pipe_seed2 = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed2.status, 'FINISHED') base.close_session() with open(self.seqrun_input_list, 'w') as fp: fp.write('171003_M00001_0089_000000000-TEST') mps = Modify_pipeline_seed(igf_id_list=self.seqrun_input_list, table_name='seqrun', pipeline_name='demultiplexing_fastq', dbconfig_file=self.dbconfig, log_slack=False, log_asana=False, clean_up=True) mps.reset_pipeline_seed_for_rerun(seeded_label='SEEDED') base.start_session() sra = SeqrunAdaptor(**{'session': base.session}) seqrun = sra.fetch_seqrun_records_igf_id( seqrun_igf_id='171003_M00001_0089_000000000-TEST') pp = PipelineAdaptor(**{'session': base.session}) pipeline = pp.fetch_pipeline_records_pipeline_name( 'demultiplexing_fastq') pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed.status, 'SEEDED') base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base = BaseAdaptor(**{'session_class':self.session_class}) base.start_session() platform_data=[{ "platform_igf_id" : "M001", "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA", "software_version" : "RTA1.18.54"}] # platform data flowcell_rule_data=[{"platform_igf_id":"M001", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] # flowcell rule data pl=PlatformAdaptor(**{'session':base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule(data=flowcell_rule_data) # loading flowcell rules data project_data=[{'project_igf_id':'ProjectA'}] # project data pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data seqrun_data=[{'seqrun_igf_id':'SeqrunA', 'flowcell_id':'000000000-D0YLK', 'platform_igf_id':'M001', 'flowcell':'MISEQ'}] # seqrun data sra=SeqrunAdaptor(**{'session':base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # load seqrun data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) # load experiment data base.commit_session() base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] base.start_session() pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sa = SampleAdaptor(**{'session': base.session}) sample_data = [ { 'sample_igf_id': 'IGFS001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS004', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'status': 'FAILED', }, ] sa.store_sample_and_attribute_data(data=sample_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X' }] user_data = [{ 'name': 'UserA', 'email_id': '*****@*****.**', 'username': '******' }] project_user_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'email_id': '*****@*****.**', 'data_authority': True }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'email_id': '*****@*****.**' }] base.start_session() ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data) pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) pa.assign_user_to_project(data=project_user_data) base.close_session()
def setUp(self): self.dbconfig='data/dbconfig.json' self.platform_json='data/platform_db_data.json' self.seqrun_json='data/seqrun_db_data.json' self.pipeline_json='data/pipeline_data.json' self.flowcell_rules_json='data/flowcell_rules.json' dbparam=read_dbconf_json(self.dbconfig) base=BaseAdaptor(**dbparam) self.engine=base.engine self.dbname=dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() # load platform data pl=PlatformAdaptor(**{'session':base.session}) pl.store_platform_data(data=read_json_data(self.platform_json)) pl.store_flowcell_barcode_rule(data=read_json_data(self.flowcell_rules_json)) # load seqrun data sra=SeqrunAdaptor(**{'session':base.session}) sra.store_seqrun_and_attribute_data(data=read_json_data(self.seqrun_json)) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() project_data=[{'project_igf_id':'ProjectA'}] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) self.temp_dir=get_temp_dir() temp_files=['a.csv','b.csv'] for temp_file in temp_files: with open(os.path.join(self.temp_dir,temp_file),'w') as fp: fp.write('A') collection_data=[{'name':'ExperimentA', 'type':'AnalysisA_html', 'table':'experiment', 'file_path':os.path.join(self.temp_dir,temp_file)} for temp_file in temp_files] ca=CollectionAdaptor(**{'session':base.session}) ca.load_file_and_create_collection(data=collection_data, calculate_file_size_and_md5=False) base.close_session()
def test_load_seqrun_files_to_db(self): valid_seqrun_dir = find_new_seqrun_dir(path=self.path, dbconfig=self.dbconfig) new_seqrun_and_md5 = calculate_file_md5(seqrun_info=valid_seqrun_dir, md5_out=self.md5_out_path, seqrun_path=self.path) load_seqrun_files_to_db(seqrun_info=valid_seqrun_dir, seqrun_md5_info=new_seqrun_and_md5, dbconfig=self.dbconfig) # check in db dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) sra = SeqrunAdaptor(**dbparam) sra.start_session() sra_data = sra.fetch_seqrun_records_igf_id(seqrun_igf_id='seqrun1') sra.close_session() self.assertEqual(sra_data.flowcell_id, 'HXXXXXXXX') seed_pipeline_table_for_new_seqrun( pipeline_name='demultiplexing_fastq', dbconfig=self.dbconfig) # check in db dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) base.start_session() seeds=base.fetch_records(query=base.session.query(Seqrun.seqrun_igf_id).\ join(Pipeline_seed, Pipeline_seed.seed_id==Seqrun.seqrun_id).\ join(Pipeline, Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Pipeline.pipeline_name=='demultiplexing_fastq').\ filter(Pipeline_seed.seed_table=='seqrun'), output_mode='object') base.close_session() self.assertTrue('seqrun1' in [s.seqrun_igf_id for s in seeds])
def test_create_or_update_analysis_collection_rename(self): au = Analysis_collection_utils(dbsession_class=self.session_class, analysis_name='AnalysisA', tag_name='TagA', collection_name='ProjectA', collection_type='AnalysisA_Files', collection_table='project') base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() au.create_or_update_analysis_collection(file_path=os.path.join( self.temp_work_dir, 'a.cram'), dbsession=base.session, autosave_db=True) base.close_session() base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') self.assertEqual(len(ca_files.index), 1) au.create_or_update_analysis_collection( file_path=os.path.join(self.temp_work_dir, 'a.cram'), dbsession=base.session, autosave_db=True, force=True) # overwriting file collection base.close_session() base.start_session() ca = CollectionAdaptor(**{'session': base.session}) ca_files = ca.get_collection_files(collection_name='ProjectA', collection_type='AnalysisA_Files', output_mode='dataframe') self.assertEqual(len(ca_files.index), 1) with self.assertRaises(sqlalchemy.exc.IntegrityError ): # file collection without force au.create_or_update_analysis_collection(\ file_path=os.path.join(self.temp_work_dir, 'a.cram'), dbsession=base.session, autosave_db=True, force=False ) base.close_session()
def _build_and_store_exp_run_and_collection_in_db(self,fastq_files_list, \ restricted_list=('10X')): ''' An internal method for building db collections for the raw fastq files ''' session_class = self.session_class db_connected = False try: restricted_list = list(restricted_list) dataframe = pd.DataFrame(fastq_files_list) # calculate additional detail dataframe=dataframe.apply(lambda data: \ self._calculate_experiment_run_and_file_info(data, restricted_list),\ axis=1) # get file data file_group_columns = [ 'name', 'type', 'location', 'R1', 'R1_md5', 'R1_size', 'R2', 'R2_md5', 'R2_size' ] file_group_data = dataframe.loc[:, file_group_columns] file_group_data = file_group_data.drop_duplicates() (file_data, file_group_data) = self._reformat_file_group_data( data=file_group_data) # get base session base = BaseAdaptor(**{'session_class': session_class}) base.start_session() db_connected = True # get experiment data experiment_columns=base.get_table_columns(table_name=Experiment, \ excluded_columns=['experiment_id', 'project_id', 'sample_id' ]) experiment_columns.extend(['project_igf_id', 'sample_igf_id']) exp_data = dataframe.loc[:, experiment_columns] exp_data = exp_data.drop_duplicates() if exp_data.index.size > 0: exp_data=exp_data.apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session,\ table_name='experiment',\ check_column='EXISTS'),\ axis=1) exp_data = exp_data[exp_data['EXISTS'] == False] # filter existing experiments exp_data.drop('EXISTS', axis=1, inplace=True) # remove extra columns exp_data = exp_data[pd.isnull(exp_data['experiment_igf_id']) == False] # filter exp with null values # get run data run_columns=base.get_table_columns(table_name=Run, \ excluded_columns=['run_id', 'seqrun_id', 'experiment_id', 'date_created', 'status' ]) run_columns.extend([ 'seqrun_igf_id', 'experiment_igf_id', 'R1_READ_COUNT', 'R2_READ_COUNT' ]) run_data = dataframe.loc[:, run_columns] run_data = run_data.drop_duplicates() if run_data.index.size > 0: run_data=run_data.apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session,\ table_name='run',\ check_column='EXISTS'),\ axis=1) run_data = run_data[run_data['EXISTS'] == False] # filter existing runs run_data.drop('EXISTS', axis=1, inplace=True) # remove extra columns run_data = run_data[pd.isnull(run_data['run_igf_id']) == False] # filter run with null values # get collection data collection_columns = ['name', 'type', 'table'] collection_data = dataframe.loc[:, collection_columns] collection_data = collection_data.drop_duplicates() if collection_data.index.size > 0: collection_data=collection_data.apply(lambda x: \ self._check_existing_data( \ data=x, \ dbsession=base.session, \ table_name='collection', \ check_column='EXISTS'), \ axis=1) collection_data = collection_data[collection_data[ 'EXISTS'] == False] # filter existing collection collection_data.drop('EXISTS', axis=1, inplace=True) # remove extra columns collection_data = collection_data[pd.isnull( collection_data['name'] ) == False] # filter collection with null values # store experiment to db if exp_data.index.size > 0: ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=exp_data, autosave=False) base.session.flush() # store run to db if run_data.index.size > 0: ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data, autosave=False) base.session.flush() # store file to db fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data, autosave=False) base.session.flush() # store collection to db ca = CollectionAdaptor(**{'session': base.session}) if collection_data.index.size > 0: ca.store_collection_and_attribute_data(data=collection_data,\ autosave=False) base.session.flush() ca.create_collection_group(data=file_group_data, autosave=False) base.commit_session() self._write_manifest_file(file_data) except: if db_connected: base.rollback_session() raise finally: if db_connected: base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [{ "platform_igf_id": "M00001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB500000", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00000", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB500000", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M00001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) seqrun_data = [{ 'seqrun_igf_id': '171003_M00001_0089_000000000-TEST', 'flowcell_id': '000000000-D0YLK', 'platform_igf_id': 'M00001', 'flowcell': 'MISEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) seqrun = sra.fetch_seqrun_records_igf_id( seqrun_igf_id='171003_M00001_0089_000000000-TEST') pipeline_data = [{ "pipeline_name": "demultiplexing_fastq", "pipeline_db": "sqlite:////data/bcl2fastq.db", "pipeline_init_conf": { "input_dir": "data/seqrun_dir/", "output_dir": "data" }, "pipeline_run_conf": { "output_dir": "data" } }] pipeseed_data = [{ "pipeline_name": "demultiplexing_fastq", "seed_table": "seqrun", "seed_id": seqrun.seqrun_id }] pp = PipelineAdaptor(**{'session': base.session}) pp.store_pipeline_data(data=pipeline_data) pp.create_pipeline_seed( data=pipeseed_data, required_columns=['pipeline_id', 'seed_id', 'seed_table']) base.close_session() self.seqrun_input_list = 'data/reset_samplesheet_md5/seqrun_pipeline_reset_list.txt' with open(self.seqrun_input_list, 'w') as fp: fp.write('')
'pipeline_name': 'PrimaryAnalysis', 'seed_id': 1, 'seed_table': 'experiment' }, { 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 2, 'seed_table': 'experiment' }, { 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 3, 'seed_table': 'experiment' }] pla.store_pipeline_data(data=pipeline_data) pla.create_pipeline_seed(data=pipeline_seed_data) base.commit_session() base.close_session() ps = Project_status(igf_session_class=base.get_session_class(), project_igf_id='ProjectA') #print(ps.get_seqrun_info(demultiplexing_pipeline='DemultiplexIlluminaFastq')) #print(ps.get_seqrun_info(active_seqrun_igf_id='SeqrunA')) #print(ps.get_seqrun_info(demultiplexing_pipeline='DemultiplexIlluminaFastq', # active_seqrun_igf_id='180410_K00345_0063_AHWL7CBBXX')) #print(ps.get_status_description()) #print(ps.get_status_column_order()) #print(ps.get_analysis_info(analysis_pipeline='PrimaryAnalysis')) #ps.generate_gviz_json_file(output_file='a', # demultiplexing_pipeline='DemultiplexIlluminaFastq', # analysis_pipeline='PrimaryAnalysis', # active_seqrun_igf_id='180410_K00345_0063_AHWL7CBBXX') Base.metadata.drop_all(engine) os.remove(dbname)
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [ { "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, ] flowcell_rule_data = [{ "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) project_data = [{'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [{ 'sample_igf_id': 'IGF103923', 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'species_name': 'HG38' }] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) seqrun_data = [ { 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'flowcell_id': '000000000-BRN47', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ' }, ] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) pipeline_data = [ { "pipeline_name": "PrimaryAnalysis", "pipeline_db": "sqlite:////bcl2fastq.db" }, { "pipeline_name": "DemultiplexIlluminaFastq", "pipeline_db": "sqlite:////bcl2fastq.db" }, ] pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=pipeline_data) file_data = [ { 'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404' }, { 'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1467047580' }, { 'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1467047580' }, ] fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data) collection_data = [ { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'IGF103923_MISEQ1_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, ] collection_files_data = [ { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz' }, { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz' }, { 'name': 'IGF103923_MISEQ1_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz' }, ] ca = CollectionAdaptor(**{'session': base.session}) ca.store_collection_and_attribute_data(data=collection_data) ca.create_collection_group(data=collection_files_data) experiment_data = [{ 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'sample_igf_id': 'IGF103923', 'experiment_igf_id': 'IGF103923_MISEQ', 'library_name': 'IGF103923', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ' }, { 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'sample_igf_id': 'IGF103923', 'experiment_igf_id': 'IGF103923_MISEQ1', 'library_name': 'IGF103923_1', 'library_source': 'GENOMIC_SINGLE_CELL', 'library_strategy': 'WGS', 'experiment_type': 'UNKNOWN', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ' }] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) run_data = [{ 'experiment_igf_id': 'IGF103923_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'IGF103923_MISEQ_000000000-BRN47_1', 'lane_number': '1' }, { 'experiment_igf_id': 'IGF103923_MISEQ1', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'IGF103923_MISEQ1_000000000-BRN47_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) base.close_session()
def _check_and_register_data(self, data, project_info_file): ''' An internal method for checking and registering data :param data: A dictionary containing following keys project_data user_data project_user_data sample_data :param project_info_file: A filepath for project info ''' try: db_connected = False project_data = pd.DataFrame(data['project_data']) user_data = pd.DataFrame(data['user_data']) project_user_data = pd.DataFrame(data['project_user_data']) sample_data = pd.DataFrame(data['sample_data']) base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() # connect_to db db_connected = True project_data = project_data[project_data[ self.project_lookup_column].isnull() == False] project_data = project_data.drop_duplicates() if project_data.index.size > 0: project_data=project_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='project', check_column='EXISTS'),\ axis=1) # get project map project_data = project_data[project_data['EXISTS'] == False] # filter existing projects project_data.drop('EXISTS', axis=1, inplace=True) # remove extra column user_data = user_data[user_data[self.user_lookup_column].isnull() == False] user_data = user_data.drop_duplicates() if user_data.index.size > 0: user_data=user_data.apply(lambda x: \ self._assign_username_and_password(x), \ axis=1) # check for use account and password user_data=user_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='user', check_column='EXISTS'),\ axis=1) # get user map user_data = user_data[user_data['EXISTS'] == False] # filter existing users user_data.drop('EXISTS', axis=1, inplace=True) # remove extra column sample_data = sample_data[sample_data[ self.sample_lookup_column].isnull() == False] sample_data = sample_data.drop_duplicates() if sample_data.index.size > 0: sample_data=sample_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='sample', check_column='EXISTS'),\ axis=1) # get sample map sample_data = sample_data[sample_data['EXISTS'] == False] # filter existing samples sample_data.drop('EXISTS', axis=1, inplace=True) # remove extra column project_user_data = project_user_data.drop_duplicates() project_user_data_mask=(project_user_data[self.project_lookup_column].isnull()==False) & \ (project_user_data[self.user_lookup_column].isnull()==False) project_user_data = project_user_data[ project_user_data_mask] # not allowing any empty values for project or user lookup if project_user_data.index.size > 0: project_user_data = self._add_default_user_to_project( project_user_data ) # update project_user_data with default users project_user_data=project_user_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='project_user', check_column='EXISTS'),\ axis=1) # get project user map project_user_data = project_user_data[project_user_data[ 'EXISTS'] == False] # filter existing project user project_user_data.drop('EXISTS', axis=1, inplace=True) # remove extra column if len(project_data.index) > 0: # store new projects pa1 = ProjectAdaptor(**{'session': base.session }) # connect to project adaptor pa1.store_project_and_attribute_data( data=project_data, autosave=False) # load project data if len(user_data.index) > 0: # store new users ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data, autosave=False) # load user data if len(project_user_data.index) > 0: # store new project users pa2 = ProjectAdaptor(**{'session': base.session }) # connect to project adaptor project_user_data = project_user_data.to_dict( orient='records') # convert dataframe to dictionary pa2.assign_user_to_project( data=project_user_data, autosave=False) # load project user data if len(sample_data.index) > 0: # store new samples sa = SampleAdaptor(**{'session': base.session }) # connect to sample adaptor sa.store_sample_and_attribute_data( data=sample_data, autosave=False) # load samples data if self.setup_irods: user_data.apply(lambda x: self._setup_irods_account(data=x), axis=1) # create irods account file_checksum = calculate_file_checksum(filepath=project_info_file) file_size = os.path.getsize(project_info_file) file_data=[{'file_path':project_info_file,\ 'location':'ORWELL',\ 'md5':file_checksum,\ 'size':file_size,\ }] fa = FileAdaptor(**{'session': base.session}) # connect to file adaptor fa.store_file_data(data=file_data, autosave=False) except: if db_connected: base.rollback_session() # rollback session raise else: if db_connected: base.commit_session() # commit changes to db if len(user_data.index) > 0 and self.notify_user: user_data.apply(lambda x: self._notify_about_new_user_account(x),\ axis=1) # send mail to new user with their password and forget it finally: if db_connected: base.close_session() # close db connection
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00001', 'experiment_igf_id': 'IGF00001_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00003', 'experiment_igf_id': 'IGF00003_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00002', 'experiment_igf_id': 'IGF00002_HISEQ4000', 'library_name': 'IGF00002' }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) pipeline_data = [{ "pipeline_name": "alignment", "pipeline_db": "sqlite:////data/aln.db", "pipeline_init_conf": { "input_dir": "data/fastq_dir/", "output_dir": "data" }, "pipeline_run_conf": { "output_dir": "data" } }] pl = PipelineAdaptor(**{'session': base.session}) pl.store_pipeline_data(data=pipeline_data) pipeline_seed_data = [ { 'pipeline_name': 'alignment', 'seed_id': '1', 'seed_table': 'experiment' }, ] pl.create_pipeline_seed(data=pipeline_seed_data) base.close_session()
def run(self): try: fastq_file = self.param_required('fastq_file') fastq_dir = self.param_required('fastq_dir') igf_session_class = self.param_required('igf_session_class') fastqc_exe = self.param_required('fastqc_exe') tag = self.param_required('tag') seqrun_igf_id = self.param_required('seqrun_igf_id') seqrun_date = self.param_required('seqrun_date') flowcell_id = self.param_required('flowcell_id') fastqc_options = self.param('fastqc_options') base_results_dir = self.param_required('base_results_dir') project_name = self.param_required('project_name') force_overwrite = self.param('force_overwrite') fastqc_dir_label = self.param('fastqc_dir_label') required_collection_table = self.param('required_collection_table') sample_name = self.param('sample_name') hpc_location = self.param('hpc_location') fastqc_collection_type = self.param('fastqc_collection_type') use_ephemeral_space = self.param('use_ephemeral_space') store_file = self.param('store_file') lane_index_info = os.path.basename(fastq_dir) # get the lane and index length info fastq_file_label = os.path.basename(fastq_file).replace('.fastq.gz','') collection_name = None collection_table = None if tag=='known' and store_file: # fetch sample name for known fastq, if its not defined base = BaseAdaptor(**{'session_class':igf_session_class}) base.start_session() # connect to db ca = CollectionAdaptor(**{'session':base.session}) (collection_name,collection_table) = \ ca.fetch_collection_name_and_table_from_file_path(\ file_path=fastq_file) # fetch collection name and table info if collection_table != required_collection_table: raise ValueError( 'Expected collection table {0} and got {1}, {2}'.\ format( required_collection_table, collection_table, fastq_file)) ra = RunAdaptor(**{'session':base.session}) sample = ra.fetch_sample_info_for_run(run_igf_id=collection_name) sample_name = sample['sample_igf_id'] base.close_session() fastqc_result_dir = \ os.path.join(\ base_results_dir, project_name, seqrun_date, flowcell_id, lane_index_info, tag) # result dir path is generic if sample_name is not None: fastqc_result_dir = \ os.path.join(\ fastqc_result_dir, sample_name) # add sample name to dir path if its available fastqc_result_dir = \ os.path.join(\ fastqc_result_dir, fastq_file_label, fastqc_dir_label) # keep multiple files under same dir if os.path.exists(fastqc_result_dir) and force_overwrite: remove_dir(fastqc_result_dir) # remove existing output dir if force_overwrite is true if not os.path.exists(fastqc_result_dir): os.makedirs(fastqc_result_dir,mode=0o775) # create output dir if its not present temp_work_dir = \ get_temp_dir(use_ephemeral_space=use_ephemeral_space) # get a temp work dir if not os.path.exists(fastq_file): raise IOError('fastq file {0} not readable'.format(fastq_file)) # raise if fastq file path is not readable fastqc_output = \ os.path.join(\ temp_work_dir, fastq_file_label) os.mkdir(fastqc_output) # create fastqc output dir fastqc_param = \ self.format_tool_options(fastqc_options) # format fastqc params fastqc_cmd = \ [fastqc_exe, '-o',fastqc_output, '-d',temp_work_dir ] # fastqc base parameters fastqc_cmd.extend(fastqc_param) # add additional parameters fastqc_cmd.append(fastq_file) # fastqc input file subprocess.check_call(' '.join(fastqc_cmd),shell=True) # run fastqc fastqc_zip = None fastqc_html = None for root, _, files in os.walk(top=fastqc_output): for file in files: if fnmatch.fnmatch(file, '*.zip'): input_fastqc_zip = os.path.join(root,file) copy2(input_fastqc_zip,fastqc_result_dir) fastqc_zip = os.path.join(fastqc_result_dir,file) if fnmatch.fnmatch(file, '*.html'): input_fastqc_html = os.path.join(root,file) copy2(input_fastqc_html,fastqc_result_dir) fastqc_html = os.path.join(fastqc_result_dir,file) if fastqc_html is None or fastqc_zip is None: raise ValueError('Missing required values, fastqc zip: {0}, fastqc html: {1}'.\ format(fastqc_zip,fastqc_html)) if tag=='known' and store_file: if collection_name is None: raise ValueError('couldn\'t retrieve collection name for {0}'.\ format(fastq_file)) fastqc_files = \ [{'name':collection_name, 'type':fastqc_collection_type, 'table':required_collection_table, 'file_path':fastqc_zip, 'location':hpc_location}, {'name':collection_name, 'type':fastqc_collection_type, 'table':required_collection_table, 'file_path':fastqc_html, 'location':hpc_location}, ] ca = CollectionAdaptor(**{'session_class':igf_session_class}) ca.start_session() ca.load_file_and_create_collection(data=fastqc_files) # store fastqc files to db ca.close_session() self.param('dataflow_params', {'fastqc_html':fastqc_html, 'lane_index_info':lane_index_info, 'sample_name':sample_name, 'fastqc':{'fastq_dir':fastq_dir, 'fastqc_zip':fastqc_zip, 'fastqc_html':fastqc_html}}) # set dataflow params except Exception as e: message = \ 'seqrun: {2}, Error in {0}: {1}'.\ format(\ self.__class__.__name__, e, seqrun_igf_id) self.warning(message) self.post_message_to_slack(message,reaction='fail') # post msg to slack for failed jobs raise
def setUp(self): self.path = 'data/seqrun_dir' self.dbconfig = 'data/dbconfig.json' self.md5_out_path = 'data/md5_dir' self.pipeline_name = 'demultiplexing_fastq' seqrun_json = 'data/seqrun_db_data.json' platform_json = 'data/platform_db_data.json' pipeline_json = 'data/pipeline_data.json' os.mkdir(self.md5_out_path) dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] self.pipeline_name = '' Base.metadata.create_all(self.engine) base.start_session() user_data = [ { 'name': 'user1', 'email_id': '*****@*****.**', 'username': '******' }, ] ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data) project_data = [{ 'project_igf_id': 'project_1', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) project_user_data = [{ 'project_igf_id': 'project_1', 'email_id': '*****@*****.**', 'data_authority': True }] pa.assign_user_to_project(data=project_user_data) sample_data = [ { 'sample_igf_id': 'IGF0001', 'project_igf_id': 'project_1', }, { 'sample_igf_id': 'IGF0002', 'project_igf_id': 'project_1', }, { 'sample_igf_id': 'IGF0003', 'project_igf_id': 'project_1', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) base.commit_session() with open(pipeline_json, 'r') as json_data: # store pipeline data to db pipeline_data = json.load(json_data) pa = PipelineAdaptor(**{'session': base.session}) pa.store_pipeline_data(data=pipeline_data) with open(platform_json, 'r') as json_data: # store platform data to db platform_data = json.load(json_data) pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) with open(seqrun_json, 'r') as json_data: # store seqrun data to db seqrun_data = json.load(json_data) sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.temp_work_dir = get_temp_dir() self.temp_base_dir = get_temp_dir() self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz'] for file_name in self.input_list: file_path = os.path.join(self.temp_work_dir, file_name) with open(file_path, 'w') as fq: fq.write('AAAA') # create input files base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() platform_data = [{ "platform_igf_id": "M001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }] # platform data flowcell_rule_data = [{ "platform_igf_id": "M001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] # flowcell rule data pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule( data=flowcell_rule_data) # loading flowcell rules data project_data = [{'project_igf_id': 'ProjectA'}] # project data pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data( data=project_data) # load project data sample_data = [{ 'sample_igf_id': 'SampleA', 'project_igf_id': 'ProjectA' }] # sample data sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data( data=sample_data) # store sample data seqrun_data = [{ 'seqrun_igf_id': 'SeqrunA', 'flowcell_id': '000000000-D0YLK', 'platform_igf_id': 'M001', 'flowcell': 'MISEQ' }] # seqrun data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data( data=seqrun_data) # load seqrun data experiment_data = [{ 'experiment_igf_id': 'ExperimentA', 'sample_igf_id': 'SampleA', 'library_name': 'SampleA', 'platform_name': 'MISEQ', 'project_igf_id': 'ProjectA' }] # experiment data ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data( data=experiment_data) # load experiment data run_data = [{ 'run_igf_id': 'RunA', 'experiment_igf_id': 'ExperimentA', 'seqrun_igf_id': 'SeqrunA', 'lane_number': '1' }] # run data ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # load run data base.commit_session() base.close_session()
def load_file_to_disk_and_db(self, input_file_list, withdraw_exisitng_collection=True, autosave_db=True, file_suffix=None, force=True, remove_file=False): ''' A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present. Directory structure of the final path is based on the collection_table information. Following will be the final directory structure if base_path is present project - base_path/project_igf_id/analysis_name sample - base_path/project_igf_id/sample_igf_id/analysis_name experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name :param input_file_list: A list of input file to load, all using the same collection info :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files :param autosave_db: Save changes to database, default True :param file_suffix: Use a specific file suffix, use None if it should be same as original file e.g. input.vcf.gz to output.vcf.gz :param force: Toggle for removing existing file, default True :param remove_file: A toggle for removing existing file from disk, default False :returns: A list of final filepath ''' try: project_igf_id = None sample_igf_id = None experiment_igf_id = None experiment_igf_id = None run_igf_id = None output_path_list = list() # define empty output list dbconnected = False if self.collection_name is None or \ self.collection_type is None or \ self.collection_table is None: raise ValueError('File collection information is incomplete' ) # check for collection information base = BaseAdaptor(**{'session_class': self.dbsession_class}) base.start_session() # connect to db dbconnected = True if self.base_path is not None: if self.collection_table == 'sample': sa = SampleAdaptor(**{'session': base.session}) sample_igf_id = self.collection_name sample_exists = sa.check_sample_records_igf_id( sample_igf_id=sample_igf_id) if not sample_exists: raise ValueError('Sample {0} not found in db'.\ format(sample_igf_id)) project_igf_id = \ sa.fetch_sample_project(sample_igf_id=sample_igf_id) # fetch project id for sample elif self.collection_table == 'experiment': ea = ExperimentAdaptor(**{'session': base.session}) experiment_igf_id = self.collection_name experiment_exists = \ ea.check_experiment_records_id( experiment_igf_id=experiment_igf_id) if not experiment_exists: raise ValueError('Experiment {0} not present in database'.\ format(experiment_igf_id)) (project_igf_id,sample_igf_id) = \ ea.fetch_project_and_sample_for_experiment( experiment_igf_id=experiment_igf_id) # fetch project and sample id for experiment elif self.collection_table == 'run': ra = RunAdaptor(**{'session': base.session}) run_igf_id = self.collection_name run_exists = ra.check_run_records_igf_id( run_igf_id=run_igf_id) if not run_exists: raise ValueError('Run {0} not found in database'.\ format(run_igf_id)) (project_igf_id,sample_igf_id,experiment_igf_id) = \ ra.fetch_project_sample_and_experiment_for_run( run_igf_id=run_igf_id) # fetch project, sample and experiment id for run elif self.collection_table == 'project': pa = ProjectAdaptor(**{'session': base.session}) project_igf_id = self.collection_name project_exists = \ pa.check_project_records_igf_id( project_igf_id=project_igf_id) if not project_exists: raise ValueError('Project {0} not found in database'.\ format(project_igf_id)) if self.rename_file and self.analysis_name is None: raise ValueError('Analysis name is required for renaming file' ) # check analysis name for input_file in input_file_list: final_path = '' if self.base_path is None: # do not move file if base_path is absent final_path = os.path.dirname(input_file) else: # move file path if self.collection_table == 'project': if project_igf_id is None: raise ValueError('Missing project id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, self.analysis_name) # final path for project elif self.collection_table == 'sample': if project_igf_id is None or \ sample_igf_id is None: raise ValueError('Missing project and sample id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, self.analysis_name) # final path for sample elif self.collection_table == 'experiment': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None: raise ValueError('Missing project,sample and experiment id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, self.analysis_name) # final path for experiment elif self.collection_table == 'run': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None or \ run_igf_id is None: raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join(\ self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, run_igf_id, self.analysis_name) # final path for run if self.rename_file: new_filename = \ self.get_new_file_name( input_file=input_file, file_suffix=file_suffix) final_path = \ os.path.join( final_path, new_filename) # get new filepath else: final_path = \ os.path.join( final_path, os.path.basename(input_file)) if final_path != input_file: # move file if its required final_path = preprocess_path_name( input_path=final_path ) # remove unexpected characters from file path move_file(source_path=input_file, destinationa_path=final_path, force=force ) # move or overwrite file to destination dir output_path_list.append( final_path) # add final path to the output list self.create_or_update_analysis_collection( file_path=final_path, dbsession=base.session, withdraw_exisitng_collection=withdraw_exisitng_collection, remove_file=remove_file, autosave_db=autosave_db) # load new file collection in db if autosave_db: base.commit_session() # save changes to db for each file base.commit_session() # save changes to db base.close_session() # close db connection return output_path_list except: if dbconnected: base.rollback_session() base.close_session() raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [{ "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB501820", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00345", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB501820", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) seqrun_data = [{ 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'flowcell_id': '000000000-BRN47', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }, { 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'flowcell_id': '000000001-BRN47', 'platform_igf_id': 'NB501820', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) project_data = [{'project_igf_id': 'projectA'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'sampleA', 'project_igf_id': 'projectA', 'species_name': 'HG38' }, { 'sample_igf_id': 'sampleB', 'project_igf_id': 'projectA', 'species_name': 'UNKNOWN' }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_MISEQ', 'library_name': 'sampleA', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_NEXTSEQ', 'library_name': 'sampleA', 'library_source': 'UNKNOWN', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'NEXTSEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleB', 'experiment_igf_id': 'sampleB_MISEQ', 'library_name': 'sampleB', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) run_data = [{ 'experiment_igf_id': 'sampleA_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1', 'lane_number': '1' }, { 'experiment_igf_id': 'sampleA_NEXTSEQ', 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'lane_number': '2' }, { 'experiment_igf_id': 'sampleB_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) file_data = [ { 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, ] fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data) collection_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'table': 'run' }] collection_files_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz' }] ca = CollectionAdaptor(**{'session': base.session}) ca.store_collection_and_attribute_data(data=collection_data) ca.create_collection_group(data=collection_files_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' self.fastq_dir = 'data/collect_fastq_dir/sc_1_8' self.model_name = 'NEXTSEQ' self.flowcell_id = 'TESTABC' self.seqrun_igf_id = '171003_NB500000_0089_TESTABC' self.file_location = 'HPC_PROJECT' self.samplesheet_file = 'data/collect_fastq_dir/sc_1_8/SampleSheet.csv' self.samplesheet_filename = 'SampleSheet.csv' self.manifest_name = 'file_manifest.csv' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.session_class base.start_session() platform_data = [{ "platform_igf_id": "M00001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB500000", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00000", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB500000", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M00001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) seqrun_data = [{ 'seqrun_igf_id': '171003_NB500000_0089_TESTABC', 'flowcell_id': 'TESTABC', 'platform_igf_id': 'NB500000', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) base.start_session() self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00001', 'experiment_igf_id': 'IGF00001_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00003', 'experiment_igf_id': 'IGF00003_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00002', 'experiment_igf_id': 'IGF00002_HISEQ4000', 'library_name': 'IGF00002' }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) base.close_session()