def test_process_project_data_and_account(self): fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\ dbconfig=self.dbconfig,\ user_account_template='template/email_notification/send_new_account_info.txt',\ log_slack=False,\ setup_irods=False,\ notify_user=False,\ check_hpc_user=False,\ ) fa.process_project_data_and_account() dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) base.start_session() pa=ProjectAdaptor(**{'session':base.session}) project_exists=pa.check_project_records_igf_id(project_igf_id='IGFP0002_test_23-5-2017_rna') self.assertTrue(project_exists) ua=UserAdaptor(**{'session':base.session}) user_exists=ua.check_user_records_email_id(email_id='*****@*****.**') self.assertTrue(user_exists) user1=ua.fetch_user_records_email_id(user_email_id='*****@*****.**') self.assertEqual(user1.name,'User2') sa=SampleAdaptor(**{'session':base.session}) sample_exists=sa.check_sample_records_igf_id(sample_igf_id='IGF00006') self.assertTrue(sample_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base = BaseAdaptor(**{'session_class':self.session_class}) base.start_session() platform_data=[{ "platform_igf_id" : "M001", "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA", "software_version" : "RTA1.18.54"}] # platform data flowcell_rule_data=[{"platform_igf_id":"M001", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] # flowcell rule data pl=PlatformAdaptor(**{'session':base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule(data=flowcell_rule_data) # loading flowcell rules data project_data=[{'project_igf_id':'ProjectA'}] # project data pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data seqrun_data=[{'seqrun_igf_id':'SeqrunA', 'flowcell_id':'000000000-D0YLK', 'platform_igf_id':'M001', 'flowcell':'MISEQ'}] # seqrun data sra=SeqrunAdaptor(**{'session':base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # load seqrun data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) # load experiment data base.commit_session() base.close_session()
def test_count_project_samples(self): pa = ProjectAdaptor(**{'session_class': self.session_class}) pa.start_session() sample1 = pa.count_project_samples( project_igf_id='IGFP0001_test_22-8-2017_rna') self.assertEqual(sample1, 3) sample2 = pa.count_project_samples( project_igf_id='IGFP0002_test_22-8-2017_rna') self.assertEqual(sample2, 0) sample3 = pa.count_project_samples( project_igf_id='IGFP0001_test_22-8-2017_rna', only_active=False) self.assertEqual(sample3, 4) pa.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] base.start_session() pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sa = SampleAdaptor(**{'session': base.session}) sample_data = [ { 'sample_igf_id': 'IGFS001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS004', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'status': 'FAILED', }, ] sa.store_sample_and_attribute_data(data=sample_data) base.close_session()
def test_check_data_authority_for_project(self): pa = ProjectAdaptor(**{'session_class': self.session_class}) pa.start_session() pa_results1 = pa.check_data_authority_for_project( project_igf_id='IGFP0001_test_22-8-2017_rna') self.assertTrue(pa_results1) pa_results2 = pa.check_data_authority_for_project( project_igf_id='IGFP0002_test_22-8-2017_rna') self.assertFalse(pa_results2) pa.close_session()
def test_fetch_data_authority_for_project(self): pa = ProjectAdaptor(**{'session_class': self.session_class}) pa.start_session() pa_results1 = pa.fetch_data_authority_for_project( project_igf_id='IGFP0001_test_22-8-2017_rna') self.assertEqual(pa_results1.email_id, '*****@*****.**') pa_results2 = pa.fetch_data_authority_for_project( project_igf_id='IGFP0002_test_22-8-2017_rna') self.assertEqual(pa_results2, None) pa.close_session()
def test_fetch_project_samples(self): pa = ProjectAdaptor(**{'session_class': self.session_class}) pa.start_session() sample1 = pa.fetch_project_samples( project_igf_id='IGFP0001_test_22-8-2017_rna', output_mode='dataframe') self.assertEqual(len(sample1.index), 3) sample2 = pa.fetch_project_samples( project_igf_id='IGFP0002_test_22-8-2017_rna', output_mode='dataframe') self.assertEqual(len(sample2.index), 0) sample3 = pa.fetch_project_samples( project_igf_id='IGFP0001_test_22-8-2017_rna', only_active=False, output_mode='dataframe') self.assertEqual(len(sample3.index), 4) pa.close_session()
def test_fetch_all_project_igf_ids(self): pa = ProjectAdaptor(**{'session_class': self.session_class}) pa.start_session() project_list = pa.fetch_all_project_igf_ids() pa.close_session() self.assertTrue('IGFP0002_test_22-8-2017_rna' in project_list['project_igf_id'].values) self.assertTrue('IGFP0001_test_22-8-2017_rna' in project_list['project_igf_id'].values) self.assertEqual(len(project_list['project_igf_id'].values), 2)
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() project_data=[{'project_igf_id':'ProjectA'}] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) self.temp_dir=get_temp_dir() temp_files=['a.csv','b.csv'] for temp_file in temp_files: with open(os.path.join(self.temp_dir,temp_file),'w') as fp: fp.write('A') collection_data=[{'name':'ExperimentA', 'type':'AnalysisA_html', 'table':'experiment', 'file_path':os.path.join(self.temp_dir,temp_file)} for temp_file in temp_files] ca=CollectionAdaptor(**{'session':base.session}) ca.load_file_and_create_collection(data=collection_data, calculate_file_size_and_md5=False) base.close_session()
def run(self): ''' A ehive runnable method for uploading analysis files to irods server :param file_list: A list of file paths to upload to irods :param irods_exe_dir: Irods executable directory :param project_igf_id: Name of the project :param analysis_name: A string for analysis name, default is 'default' :param dir_path_list: A list of directory structure for irod server, default None for using datestamp :param file_tag: A text string for adding tag to collection, default None for only project_name ''' try: project_igf_id = self.param_required('project_igf_id') igf_session_class = self.param_required('igf_session_class') irods_exe_dir = self.param_required('irods_exe_dir') file_list = self.param_required('file_list') analysis_name = self.param_required('analysis_name') dir_path_list = self.param_required('dir_path_list') file_tag = self.param_required('file_tag') pa = ProjectAdaptor(**{'session_class':igf_session_class}) pa.start_session() user = \ pa.fetch_data_authority_for_project( project_igf_id=project_igf_id) # fetch user info from db pa.close_session() if user is None: raise ValueError('No user found for project {0}'.format(project_igf_id)) username = user.username # get username for irods irods_upload = IGF_irods_uploader(irods_exe_dir) # create instance for irods upload for file_path in file_list: if not os.path.exists(file_path): raise IOError('Failed to find file {0} for irods upload'.\ format(file_path)) irods_upload.\ upload_analysis_results_and_create_collection( file_list=file_list, irods_user=username, project_name=project_igf_id, analysis_name=analysis_name, dir_path_list=dir_path_list, file_tag=file_tag) # upload analysis results to irods and build collection except Exception as e: message = \ 'project: {2}, Error in {0}: {1}'.format( self.__class__.__name__, e, project_igf_id) self.warning(message) self.post_message_to_slack(message,reaction='fail') # post msg to slack for failed jobs raise
def setUp(self): self.dbconfig='data/dbconfig.json' self.new_project_data='data/check_project_data/new_project_data.csv' dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class=base.session_class base.start_session() ua=UserAdaptor(**{'session':base.session}) user_data=[{'name':'user1','email_id':'*****@*****.**','username':'******'}, {'name':'igf','email_id':'*****@*****.**','username':'******'}] ua.store_user_data(data=user_data) project_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'project_name':'test_22-8-2017_rna', 'description':'Its project 1', 'project_deadline':'Before August 2017', 'comments':'Some samples are treated with drug X', }] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) project_user_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'email_id':'*****@*****.**', 'data_authority':True}] pa.assign_user_to_project(data=project_user_data) sample_data=[{'sample_igf_id':'IGF00001', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00002', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00003', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00004', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00005', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, ] sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) base.close_session() new_project_data=[{'project_igf_id':'IGFP0002_test_23-5-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00006', }, {'project_igf_id':'IGFP0003_test_24-8-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00007', 'barcode_check':'OFF' }] pd.DataFrame(new_project_data).to_csv(os.path.join('.',self.new_project_data))
def get_project_read_count(project_igf_id, session_class, run_attribute_name='R1_READ_COUNT', active_status='ACTIVE'): ''' A utility method for fetching sample read counts for an input project_igf_id :param project_igf_id: A project_igf_id string :param session_class: A db session class object :param run_attribute_name: Attribute name from Run_attribute table for read count lookup :param active_status: text label for active runs, default ACTIVE :returns: A pandas dataframe containing following columns project_igf_id sample_igf_id flowcell_id attribute_value ''' try: read_count = pd.DataFrame() pr = ProjectAdaptor(**{'session_class': session_class}) pr.start_session() query=pr.session.query(Project.project_igf_id, Sample.sample_igf_id, Seqrun.flowcell_id, Run_attribute.attribute_value).\ join(Sample,Project.project_id==Sample.project_id).\ join(Experiment,Sample.sample_id==Experiment.sample_id).\ join(Run,Experiment.experiment_id==Run.experiment_id).\ join(Seqrun,Seqrun.seqrun_id==Run.seqrun_id).\ join(Run_attribute,Run.run_id==Run_attribute.run_id).\ filter(Project.project_igf_id==project_igf_id).\ filter(Sample.project_id==Project.project_id).\ filter(Experiment.sample_id==Sample.sample_id).\ filter(Run.experiment_id==Experiment.experiment_id).\ filter(Seqrun.seqrun_id==Run.seqrun_id).\ filter(Run_attribute.run_id==Run.run_id).\ filter(Run_attribute.attribute_name==run_attribute_name).\ filter(Run.status==active_status).\ filter(Experiment.status==active_status).\ filter(Sample.status==active_status) results = pr.fetch_records(query=query) pr.close_session() if len(results.index) > 0: read_count = results return read_count except: raise
def get_seqrun_info_for_project(project_igf_id, session_class): ''' A utility method for fetching seqrun_igf_id and flowcell_id which are linked to a specific project_igf_id required params: project_igf_id: A project_igf_id string session_class: A db session class object returns a pandas dataframe containing following columns seqrun_igf_id flowcell_id ''' try: seqrun_info = pd.DataFrame() pr = ProjectAdaptor(**{'session_class': session_class}) pr.start_session() query=pr.session.query(distinct(Seqrun.seqrun_igf_id).\ label('seqrun_igf_id'), Seqrun.flowcell_id).\ join(Run,Seqrun.seqrun_id==Run.seqrun_id).\ join(Experiment,Experiment.experiment_id==Run.experiment_id).\ join(Sample,Sample.sample_id==Experiment.sample_id).\ join(Project,Project.project_id==Sample.project_id).\ filter(Project.project_id==Sample.project_id).\ filter(Sample.sample_id==Experiment.sample_id).\ filter(Experiment.experiment_id==Run.experiment_id).\ filter(Run.seqrun_id==Seqrun.seqrun_id).\ filter(Project.project_igf_id==project_igf_id) results = pr.fetch_records(query=query) pr.close_session() if len(results.index) > 0: seqrun_info = results return seqrun_info except: raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X' }] user_data = [{ 'name': 'UserA', 'email_id': '*****@*****.**', 'username': '******' }] project_user_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'email_id': '*****@*****.**', 'data_authority': True }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'email_id': '*****@*****.**' }] base.start_session() ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data) pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) pa.assign_user_to_project(data=project_user_data) base.close_session()
def run(self): try: fastq_dir = self.param_required('fastq_dir') seqrun_igf_id = self.param_required('seqrun_igf_id') project_name = self.param_required('project_name') igf_session_class = self.param_required('igf_session_class') irods_exe_dir = self.param_required('irods_exe_dir') flowcell_id = self.param_required('flowcell_id') samplesheet_filename = self.param('samplesheet_filename') manifest_name = self.param_required('manifest_name') report_html = self.param('report_html') use_ephemeral_space = self.param('use_ephemeral_space') pa = ProjectAdaptor(**{'session_class':igf_session_class}) pa.start_session() user = \ pa.fetch_data_authority_for_project(\ project_igf_id=project_name) # fetch user info from db pa.close_session() if user is None: raise ValueError('No user found for project {0}'.\ format(project_name)) username = user.username # get username for irods report_htmlname = os.path.basename(report_html) seqrun_date = seqrun_igf_id.split('_')[0] # collect seqrun date from igf id seqrun_date = datetime.datetime.strptime(seqrun_date,'%y%m%d').date() # identify actual date seqrun_date = str(seqrun_date) # convert object to string irods_upload = IGF_irods_uploader(irods_exe_dir) # create instance for irods upload base_seq_dir = os.path.basename(fastq_dir) # get base name for the source dir tarfile_name = \ '{0}_{1}_{2}.tar'.\ format(\ project_name, base_seq_dir, seqrun_date) # construct name of the tarfile temp_work_dir = \ get_temp_dir(use_ephemeral_space=use_ephemeral_space) # get a temp dir tarfile_name = \ os.path.join( temp_work_dir, tarfile_name) # create tarfile in the temp dir with tarfile.open(tarfile_name, "w") as tar: for root,_, files in os.walk(top=fastq_dir): if samplesheet_filename in files: samplesheet_file = \ os.path.join(os.path.abspath(root), samplesheet_filename) # get samplesheet filepath tmp_samplesheet_file = \ os.path.join( temp_work_dir, '{0}_{1}_{2}_{3}'.\ format( project_name, base_seq_dir, seqrun_date, samplesheet_filename)) copy2( samplesheet_file, tmp_samplesheet_file) # change samplesheet filename tar.add( tmp_samplesheet_file, arcname=\ os.path.relpath( tmp_samplesheet_file, start=temp_work_dir)) # add samplesheet file to tar if report_htmlname in files: for file in files: if fnmatch.fnmatch(os.path.join(root,file),report_html): report_file = os.path.join(os.path.abspath(root),file) # get filepath for the report tmp_report_file = \ os.path.join(\ temp_work_dir, '{0}_{1}_{2}_{3}'.\ format(\ project_name, base_seq_dir, seqrun_date, os.path.basename(report_file))) # change report name copy2(report_file, tmp_report_file) # copy report file to temp tar.add(tmp_report_file, arcname=os.path.relpath(tmp_report_file, start=temp_work_dir)) # add demultiplexing report to tar if manifest_name in files: manifest_file = \ os.path.join(os.path.abspath(root), manifest_name) # get samplesheet filepath tmp_manifest_file = \ os.path.join(\ temp_work_dir, '{0}_{1}_{2}_{3}'.\ format(\ project_name, base_seq_dir, seqrun_date, manifest_name)) # change manifest name copy2(manifest_file,tmp_manifest_file) # copy manifest to temp tar.add(tmp_manifest_file, arcname=os.path.relpath(tmp_manifest_file, start=temp_work_dir)) # add samplesheet file to tar for file in files: if fnmatch.fnmatch(file, '*.fastq.gz') and \ not fnmatch.fnmatch(file, 'Undetermined_*'): fastq_file_path = os.path.join(os.path.abspath(root),file) # get filepath for the fastq files tar.add(fastq_file_path, arcname=os.path.relpath(fastq_file_path, start=fastq_dir)) # add fastq file to tar irods_upload.\ upload_fastqfile_and_create_collection(\ filepath=tarfile_name, irods_user=username, project_name=project_name, run_igf_id=seqrun_igf_id, flowcell_id=flowcell_id, run_date=seqrun_date) # upload fastq data to irods remove_dir(temp_work_dir) # remove temp dir once data uoload is done except Exception as e: message = \ 'seqrun: {2}, Error in {0}: {1}'.\ format(\ self.__class__.__name__, e, seqrun_igf_id) self.warning(message) self.post_message_to_slack(message,reaction='fail') # post msg to slack for failed jobs raise
def test_mark_project_barcode_check_off(self): pr = ProjectAdaptor(**{'session_class': self.session_class}) pr.start_session() pr.store_project_and_attribute_data(self.data) pr.close_session() mark_project_barcode_check_off( project_igf_id='IGFP001_test1_24-1-18', session_class=self.session_class) # no attribute record pr.start_session() attribute_check = pr.check_project_attributes( project_igf_id='IGFP001_test1_24-1-18', attribute_name='barcode_check') self.assertTrue(attribute_check) pr_attributes = pr.get_project_attributes( project_igf_id='IGFP001_test1_24-1-18', attribute_name='barcode_check') for pr_attribute in pr_attributes.to_dict(orient='records'): self.assertEqual(pr_attribute['attribute_value'], 'OFF') pr_attributes = pr.get_project_attributes( project_igf_id='IGFP002_test1_24-1-18', attribute_name='barcode_check') for pr_attribute in pr_attributes.to_dict(orient='records'): self.assertEqual(pr_attribute['attribute_value'], 'ON') pr.close_session() mark_project_barcode_check_off( project_igf_id='IGFP002_test1_24-1-18', session_class=self.session_class) # barcode check ON pr.start_session() pr_attributes = pr.get_project_attributes( project_igf_id='IGFP002_test1_24-1-18', attribute_name='barcode_check') for pr_attribute in pr_attributes.to_dict(orient='records'): self.assertEqual(pr_attribute['attribute_value'], 'OFF') pr_attributes = pr.get_project_attributes( project_igf_id='IGFP003_test1_24-1-18', attribute_name='barcode_check') for pr_attribute in pr_attributes.to_dict(orient='records'): self.assertEqual(pr_attribute['attribute_value'], 'OFF') pr.close_session() mark_project_barcode_check_off( project_igf_id='IGFP003_test1_24-1-18', session_class=self.session_class) # barcode check OFF pr.start_session() pr_attributes = pr.get_project_attributes( project_igf_id='IGFP003_test1_24-1-18', attribute_name='barcode_check') for pr_attribute in pr_attributes.to_dict(orient='records'): self.assertEqual(pr_attribute['attribute_value'], 'OFF') pr.close_session()
'run_igf_id': 'RunB', 'experiment_igf_id': 'ExperimentA', 'seqrun_igf_id': '180610_K00345_0063_AHWL7CBBXX', 'lane_number': '1' }, { 'run_igf_id': 'RunC', 'experiment_igf_id': 'ExperimentA', 'seqrun_igf_id': '180410_K00345_0063_AHWL7CBBXX', 'lane_number': '1' }] # run data base.start_session() pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule( data=flowcell_rule_data) # loading flowcell rules data pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # load seqrun data ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data( data=experiment_data) # load experiment data ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # load run data pipeline_data = [{ "pipeline_name": "DemultiplexIlluminaFastq", "pipeline_db": "sqlite:////bcl2fastq.db", }]
def mark_project_barcode_check_off(project_igf_id, session_class, barcode_check_attribute='barcode_check', barcode_check_val='OFF'): ''' A utility method for marking project barcode check as off using the project_igf_id :param project_igf_id: A project_igf_id string :param session_class: A db session class object :param barcode_check_attribute: A text keyword for barcode check attribute, default barcode_check :param barcode_check_val: A text for barcode check attribute value, default is 'OFF' ''' try: db_connected = False pr = ProjectAdaptor(**{'session_class': session_class}) pr.start_session() db_connected = True pr_attributes = pr.check_project_attributes( project_igf_id=project_igf_id, attribute_name=barcode_check_attribute ) # check for the existing project attribute if pr_attributes: # if attribute present, then modify it project = pr.fetch_project_records_igf_id( project_igf_id=project_igf_id) # fetch project info query=pr.session.\ query(Project_attribute).\ filter(Project_attribute.attribute_name==barcode_check_attribute).\ filter(Project_attribute.project_id==project.project_id).\ update({Project_attribute.attribute_value:barcode_check_val,}, synchronize_session=False) # create query for fetching attribute records and modify attribute records else: # if project attribute is not present, store it data = [{ 'project_igf_id': project_igf_id, 'attribute_name': barcode_check_attribute, 'attribute_value': barcode_check_val }] # create data structure for the attribute table pr.store_project_attributes( data, autosave=False ) # store data to attribute table without auto commit pr.commit_session() except: if db_connected: pr.rollback_session() raise finally: if db_connected: pr.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() # load platform data platform_data=\ [{"platform_igf_id" : "M03291" , "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA" , "software_version" : "RTA1.18.54" }, {"platform_igf_id" : "NB501820", "model_name" : "NEXTSEQ", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }, {"platform_igf_id" : "K00345", "model_name" : "HISEQ4000", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }] flowcell_rule_data=\ [{"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 SR", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}, {"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 PE", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"NB501820", "flowcell_type":"NEXTSEQ", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"M03291", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] pl = PlatformAdaptor(**{'session_class': base.session_class}) pl.start_session() pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) pl.close_session() # load project data project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}] pa = ProjectAdaptor(**{'session_class': base.session_class}) pa.start_session() pa.store_project_and_attribute_data(data=project_data) pa.close_session() # load samples sample_data = [ { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797_1', 'expected_read': 40000000 }, ] sa = SampleAdaptor(**{'session_class': base.session_class}) sa.start_session() sa.store_sample_and_attribute_data(data=sample_data) sa.close_session() # load seqrun data seqrun_data = [{ 'flowcell_id': 'HV2GJBBXX', 'platform_igf_id': 'K00345', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX' }] sra = SeqrunAdaptor(**{'session_class': base.session_class}) sra.start_session() sra.store_seqrun_and_attribute_data(data=seqrun_data) sra.close_session() # load experiment data experiment_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'library_name': 'IGF109792', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'library_name': 'IGF109793', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'library_name': 'IGF109794', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'library_name': 'IGF109795', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'library_name': 'IGF109796', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'library_name': 'IGF109797', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', }, ] ea = ExperimentAdaptor(**{'session_class': base.session_class}) ea.start_session() ea.store_project_and_attribute_data(data=experiment_data) ea.close_session() # load run data run_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':288046541 }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':14666330 }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':5009143 }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1391747 }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'lane_number': '7', 'run_igf_id': ' IGF109796_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1318008 }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1216324 }, ] ra = RunAdaptor(**{'session_class': base.session_class}) ra.start_session() ra.store_run_and_attribute_data(data=run_data) ra.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) base.start_session() self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00001', 'experiment_igf_id': 'IGF00001_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00003', 'experiment_igf_id': 'IGF00003_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00002', 'experiment_igf_id': 'IGF00002_HISEQ4000', 'library_name': 'IGF00002' }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.temp_work_dir = get_temp_dir() self.temp_base_dir = get_temp_dir() self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz'] for file_name in self.input_list: file_path = os.path.join(self.temp_work_dir, file_name) with open(file_path, 'w') as fq: fq.write('AAAA') # create input files base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() platform_data = [{ "platform_igf_id": "M001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }] # platform data flowcell_rule_data = [{ "platform_igf_id": "M001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] # flowcell rule data pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule( data=flowcell_rule_data) # loading flowcell rules data project_data = [{'project_igf_id': 'ProjectA'}] # project data pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data( data=project_data) # load project data sample_data = [{ 'sample_igf_id': 'SampleA', 'project_igf_id': 'ProjectA' }] # sample data sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data( data=sample_data) # store sample data seqrun_data = [{ 'seqrun_igf_id': 'SeqrunA', 'flowcell_id': '000000000-D0YLK', 'platform_igf_id': 'M001', 'flowcell': 'MISEQ' }] # seqrun data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data( data=seqrun_data) # load seqrun data experiment_data = [{ 'experiment_igf_id': 'ExperimentA', 'sample_igf_id': 'SampleA', 'library_name': 'SampleA', 'platform_name': 'MISEQ', 'project_igf_id': 'ProjectA' }] # experiment data ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data( data=experiment_data) # load experiment data run_data = [{ 'run_igf_id': 'RunA', 'experiment_igf_id': 'ExperimentA', 'seqrun_igf_id': 'SeqrunA', 'lane_number': '1' }] # run data ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # load run data base.commit_session() base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00001', 'experiment_igf_id': 'IGF00001_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00003', 'experiment_igf_id': 'IGF00003_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00002', 'experiment_igf_id': 'IGF00002_HISEQ4000', 'library_name': 'IGF00002' }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) pipeline_data = [{ "pipeline_name": "alignment", "pipeline_db": "sqlite:////data/aln.db", "pipeline_init_conf": { "input_dir": "data/fastq_dir/", "output_dir": "data" }, "pipeline_run_conf": { "output_dir": "data" } }] pl = PipelineAdaptor(**{'session': base.session}) pl.store_pipeline_data(data=pipeline_data) pipeline_seed_data = [ { 'pipeline_name': 'alignment', 'seed_id': '1', 'seed_table': 'experiment' }, ] pl.create_pipeline_seed(data=pipeline_seed_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [ { "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, ] flowcell_rule_data = [{ "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) project_data = [{'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [{ 'sample_igf_id': 'IGF103923', 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'species_name': 'HG38' }] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) seqrun_data = [ { 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'flowcell_id': '000000000-BRN47', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ' }, ] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) pipeline_data = [ { "pipeline_name": "PrimaryAnalysis", "pipeline_db": "sqlite:////bcl2fastq.db" }, { "pipeline_name": "DemultiplexIlluminaFastq", "pipeline_db": "sqlite:////bcl2fastq.db" }, ] pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=pipeline_data) file_data = [ { 'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404' }, { 'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1467047580' }, { 'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1467047580' }, ] fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data) collection_data = [ { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'IGF103923_MISEQ1_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, ] collection_files_data = [ { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz' }, { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz' }, { 'name': 'IGF103923_MISEQ1_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz' }, ] ca = CollectionAdaptor(**{'session': base.session}) ca.store_collection_and_attribute_data(data=collection_data) ca.create_collection_group(data=collection_files_data) experiment_data = [{ 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'sample_igf_id': 'IGF103923', 'experiment_igf_id': 'IGF103923_MISEQ', 'library_name': 'IGF103923', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ' }, { 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'sample_igf_id': 'IGF103923', 'experiment_igf_id': 'IGF103923_MISEQ1', 'library_name': 'IGF103923_1', 'library_source': 'GENOMIC_SINGLE_CELL', 'library_strategy': 'WGS', 'experiment_type': 'UNKNOWN', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ' }] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) run_data = [{ 'experiment_igf_id': 'IGF103923_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'IGF103923_MISEQ_000000000-BRN47_1', 'lane_number': '1' }, { 'experiment_igf_id': 'IGF103923_MISEQ1', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'IGF103923_MISEQ1_000000000-BRN47_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) base.close_session()
def _check_existing_data(self, data, dbsession, table_name, check_column='EXISTS'): ''' An internal function for checking and registering project info :param data: A pandas data series :param dbsession: A sqlalchemy database session object :param table_name: A database table name :param check_column: Column name for existing data ''' try: if not isinstance(data, pd.Series): raise ValueError('Expecting a data series and got {0}'.format( type(data))) if table_name == 'project': if self.project_lookup_column in data and \ not pd.isnull(data[self.project_lookup_column]): project_igf_id = data[self.project_lookup_column] pa = ProjectAdaptor(**{'session': dbsession }) # connect to project adaptor project_exists = pa.check_project_records_igf_id( project_igf_id) if project_exists: # store data only if project is not existing data[check_column] = True else: data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}'.\ format(self.project_lookup_column)) elif table_name == 'user': if self.user_lookup_column in data and \ not pd.isnull(data[self.user_lookup_column]): user_email = data[self.user_lookup_column] ua = UserAdaptor(**{'session': dbsession}) # connect to user adaptor user_exists = ua.check_user_records_email_id( email_id=user_email) if user_exists: # store data only if user is not existing data[check_column] = True else: data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}'.\ format(self.user_lookup_column)) elif table_name == 'sample': if self.sample_lookup_column in data and \ not pd.isnull(data[self.sample_lookup_column]): project_igf_id = data[self.project_lookup_column] sample_igf_id = data[self.sample_lookup_column] sa = SampleAdaptor(**{'session': dbsession }) # connect to sample adaptor sample_project_exists=sa.check_project_and_sample(project_igf_id=project_igf_id,\ sample_igf_id=sample_igf_id) # check for existing sample_id and project-id combination if sample_project_exists: # store data only if sample is not existing data[check_column] = True else: sample_exists = sa.check_sample_records_igf_id( sample_igf_id) # check for existing sample if sample_exists: raise ValueError('Sample {0} exists in database but not associated with project {1}'.\ format(sample_igf_id,project_igf_id)) # inconsistency in sample project combination data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}'.\ format(self.sample_lookup_column)) elif table_name == 'project_user': if self.user_lookup_column in data and \ not pd.isnull(data[self.user_lookup_column]) and \ self.project_lookup_column in data and \ not pd.isnull(data[self.project_lookup_column]): project_igf_id = data[self.project_lookup_column] user_email = data[self.user_lookup_column] pa = ProjectAdaptor(**{'session': dbsession }) # connect to project adaptor project_user_exists=pa.check_existing_project_user(project_igf_id,\ email_id=user_email) if user_email != self.default_user_email and \ (self.data_authority_column not in data or \ pd.isnull(data[self.data_authority_column])): data[ self. data_authority_column] = True # set user as data authority, filter default user if project_user_exists: # store data only if sample is not existing data[check_column] = True else: data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}, {1}'.\ format(self.project_lookup_column,\ self.user_lookup_column)) else: raise ValueError('table {0} not supported'.format(table_name)) except: raise
def _check_and_register_data(self, data, project_info_file): ''' An internal method for checking and registering data :param data: A dictionary containing following keys project_data user_data project_user_data sample_data :param project_info_file: A filepath for project info ''' try: db_connected = False project_data = pd.DataFrame(data['project_data']) user_data = pd.DataFrame(data['user_data']) project_user_data = pd.DataFrame(data['project_user_data']) sample_data = pd.DataFrame(data['sample_data']) base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() # connect_to db db_connected = True project_data = project_data[project_data[ self.project_lookup_column].isnull() == False] project_data = project_data.drop_duplicates() if project_data.index.size > 0: project_data=project_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='project', check_column='EXISTS'),\ axis=1) # get project map project_data = project_data[project_data['EXISTS'] == False] # filter existing projects project_data.drop('EXISTS', axis=1, inplace=True) # remove extra column user_data = user_data[user_data[self.user_lookup_column].isnull() == False] user_data = user_data.drop_duplicates() if user_data.index.size > 0: user_data=user_data.apply(lambda x: \ self._assign_username_and_password(x), \ axis=1) # check for use account and password user_data=user_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='user', check_column='EXISTS'),\ axis=1) # get user map user_data = user_data[user_data['EXISTS'] == False] # filter existing users user_data.drop('EXISTS', axis=1, inplace=True) # remove extra column sample_data = sample_data[sample_data[ self.sample_lookup_column].isnull() == False] sample_data = sample_data.drop_duplicates() if sample_data.index.size > 0: sample_data=sample_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='sample', check_column='EXISTS'),\ axis=1) # get sample map sample_data = sample_data[sample_data['EXISTS'] == False] # filter existing samples sample_data.drop('EXISTS', axis=1, inplace=True) # remove extra column project_user_data = project_user_data.drop_duplicates() project_user_data_mask=(project_user_data[self.project_lookup_column].isnull()==False) & \ (project_user_data[self.user_lookup_column].isnull()==False) project_user_data = project_user_data[ project_user_data_mask] # not allowing any empty values for project or user lookup if project_user_data.index.size > 0: project_user_data = self._add_default_user_to_project( project_user_data ) # update project_user_data with default users project_user_data=project_user_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='project_user', check_column='EXISTS'),\ axis=1) # get project user map project_user_data = project_user_data[project_user_data[ 'EXISTS'] == False] # filter existing project user project_user_data.drop('EXISTS', axis=1, inplace=True) # remove extra column if len(project_data.index) > 0: # store new projects pa1 = ProjectAdaptor(**{'session': base.session }) # connect to project adaptor pa1.store_project_and_attribute_data( data=project_data, autosave=False) # load project data if len(user_data.index) > 0: # store new users ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data, autosave=False) # load user data if len(project_user_data.index) > 0: # store new project users pa2 = ProjectAdaptor(**{'session': base.session }) # connect to project adaptor project_user_data = project_user_data.to_dict( orient='records') # convert dataframe to dictionary pa2.assign_user_to_project( data=project_user_data, autosave=False) # load project user data if len(sample_data.index) > 0: # store new samples sa = SampleAdaptor(**{'session': base.session }) # connect to sample adaptor sa.store_sample_and_attribute_data( data=sample_data, autosave=False) # load samples data if self.setup_irods: user_data.apply(lambda x: self._setup_irods_account(data=x), axis=1) # create irods account file_checksum = calculate_file_checksum(filepath=project_info_file) file_size = os.path.getsize(project_info_file) file_data=[{'file_path':project_info_file,\ 'location':'ORWELL',\ 'md5':file_checksum,\ 'size':file_size,\ }] fa = FileAdaptor(**{'session': base.session}) # connect to file adaptor fa.store_file_data(data=file_data, autosave=False) except: if db_connected: base.rollback_session() # rollback session raise else: if db_connected: base.commit_session() # commit changes to db if len(user_data.index) > 0 and self.notify_user: user_data.apply(lambda x: self._notify_about_new_user_account(x),\ axis=1) # send mail to new user with their password and forget it finally: if db_connected: base.close_session() # close db connection
def setUp(self): self.path = 'data/seqrun_dir' self.dbconfig = 'data/dbconfig.json' self.md5_out_path = 'data/md5_dir' self.pipeline_name = 'demultiplexing_fastq' seqrun_json = 'data/seqrun_db_data.json' platform_json = 'data/platform_db_data.json' pipeline_json = 'data/pipeline_data.json' os.mkdir(self.md5_out_path) dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] self.pipeline_name = '' Base.metadata.create_all(self.engine) base.start_session() user_data = [ { 'name': 'user1', 'email_id': '*****@*****.**', 'username': '******' }, ] ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data) project_data = [{ 'project_igf_id': 'project_1', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) project_user_data = [{ 'project_igf_id': 'project_1', 'email_id': '*****@*****.**', 'data_authority': True }] pa.assign_user_to_project(data=project_user_data) sample_data = [ { 'sample_igf_id': 'IGF0001', 'project_igf_id': 'project_1', }, { 'sample_igf_id': 'IGF0002', 'project_igf_id': 'project_1', }, { 'sample_igf_id': 'IGF0003', 'project_igf_id': 'project_1', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) base.commit_session() with open(pipeline_json, 'r') as json_data: # store pipeline data to db pipeline_data = json.load(json_data) pa = PipelineAdaptor(**{'session': base.session}) pa.store_pipeline_data(data=pipeline_data) with open(platform_json, 'r') as json_data: # store platform data to db platform_data = json.load(json_data) pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) with open(seqrun_json, 'r') as json_data: # store seqrun data to db seqrun_data = json.load(json_data) sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' self.fastq_dir = 'data/collect_fastq_dir/sc_1_8' self.model_name = 'NEXTSEQ' self.flowcell_id = 'TESTABC' self.seqrun_igf_id = '171003_NB500000_0089_TESTABC' self.file_location = 'HPC_PROJECT' self.samplesheet_file = 'data/collect_fastq_dir/sc_1_8/SampleSheet.csv' self.samplesheet_filename = 'SampleSheet.csv' self.manifest_name = 'file_manifest.csv' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.session_class base.start_session() platform_data = [{ "platform_igf_id": "M00001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB500000", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00000", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB500000", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M00001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) seqrun_data = [{ 'seqrun_igf_id': '171003_NB500000_0089_TESTABC', 'flowcell_id': 'TESTABC', 'platform_igf_id': 'NB500000', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) base.close_session()
def load_file_to_disk_and_db(self, input_file_list, withdraw_exisitng_collection=True, autosave_db=True, file_suffix=None, force=True, remove_file=False): ''' A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present. Directory structure of the final path is based on the collection_table information. Following will be the final directory structure if base_path is present project - base_path/project_igf_id/analysis_name sample - base_path/project_igf_id/sample_igf_id/analysis_name experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name :param input_file_list: A list of input file to load, all using the same collection info :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files :param autosave_db: Save changes to database, default True :param file_suffix: Use a specific file suffix, use None if it should be same as original file e.g. input.vcf.gz to output.vcf.gz :param force: Toggle for removing existing file, default True :param remove_file: A toggle for removing existing file from disk, default False :returns: A list of final filepath ''' try: project_igf_id = None sample_igf_id = None experiment_igf_id = None experiment_igf_id = None run_igf_id = None output_path_list = list() # define empty output list dbconnected = False if self.collection_name is None or \ self.collection_type is None or \ self.collection_table is None: raise ValueError('File collection information is incomplete' ) # check for collection information base = BaseAdaptor(**{'session_class': self.dbsession_class}) base.start_session() # connect to db dbconnected = True if self.base_path is not None: if self.collection_table == 'sample': sa = SampleAdaptor(**{'session': base.session}) sample_igf_id = self.collection_name sample_exists = sa.check_sample_records_igf_id( sample_igf_id=sample_igf_id) if not sample_exists: raise ValueError('Sample {0} not found in db'.\ format(sample_igf_id)) project_igf_id = \ sa.fetch_sample_project(sample_igf_id=sample_igf_id) # fetch project id for sample elif self.collection_table == 'experiment': ea = ExperimentAdaptor(**{'session': base.session}) experiment_igf_id = self.collection_name experiment_exists = \ ea.check_experiment_records_id( experiment_igf_id=experiment_igf_id) if not experiment_exists: raise ValueError('Experiment {0} not present in database'.\ format(experiment_igf_id)) (project_igf_id,sample_igf_id) = \ ea.fetch_project_and_sample_for_experiment( experiment_igf_id=experiment_igf_id) # fetch project and sample id for experiment elif self.collection_table == 'run': ra = RunAdaptor(**{'session': base.session}) run_igf_id = self.collection_name run_exists = ra.check_run_records_igf_id( run_igf_id=run_igf_id) if not run_exists: raise ValueError('Run {0} not found in database'.\ format(run_igf_id)) (project_igf_id,sample_igf_id,experiment_igf_id) = \ ra.fetch_project_sample_and_experiment_for_run( run_igf_id=run_igf_id) # fetch project, sample and experiment id for run elif self.collection_table == 'project': pa = ProjectAdaptor(**{'session': base.session}) project_igf_id = self.collection_name project_exists = \ pa.check_project_records_igf_id( project_igf_id=project_igf_id) if not project_exists: raise ValueError('Project {0} not found in database'.\ format(project_igf_id)) if self.rename_file and self.analysis_name is None: raise ValueError('Analysis name is required for renaming file' ) # check analysis name for input_file in input_file_list: final_path = '' if self.base_path is None: # do not move file if base_path is absent final_path = os.path.dirname(input_file) else: # move file path if self.collection_table == 'project': if project_igf_id is None: raise ValueError('Missing project id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, self.analysis_name) # final path for project elif self.collection_table == 'sample': if project_igf_id is None or \ sample_igf_id is None: raise ValueError('Missing project and sample id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, self.analysis_name) # final path for sample elif self.collection_table == 'experiment': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None: raise ValueError('Missing project,sample and experiment id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, self.analysis_name) # final path for experiment elif self.collection_table == 'run': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None or \ run_igf_id is None: raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join(\ self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, run_igf_id, self.analysis_name) # final path for run if self.rename_file: new_filename = \ self.get_new_file_name( input_file=input_file, file_suffix=file_suffix) final_path = \ os.path.join( final_path, new_filename) # get new filepath else: final_path = \ os.path.join( final_path, os.path.basename(input_file)) if final_path != input_file: # move file if its required final_path = preprocess_path_name( input_path=final_path ) # remove unexpected characters from file path move_file(source_path=input_file, destinationa_path=final_path, force=force ) # move or overwrite file to destination dir output_path_list.append( final_path) # add final path to the output list self.create_or_update_analysis_collection( file_path=final_path, dbsession=base.session, withdraw_exisitng_collection=withdraw_exisitng_collection, remove_file=remove_file, autosave_db=autosave_db) # load new file collection in db if autosave_db: base.commit_session() # save changes to db for each file base.commit_session() # save changes to db base.close_session() # close db connection return output_path_list except: if dbconnected: base.rollback_session() base.close_session() raise
def run(self): try: seqrun_igf_id = self.param_required('seqrun_igf_id') project_name = self.param_required('project_name') seqrun_date = self.param_required('seqrun_date') flowcell_id = self.param_required('flowcell_id') igf_session_class = self.param_required('igf_session_class') template_dir = self.param_required('template_dir') email_template_path = self.param('email_template_path') email_template = self.param('email_template') sendmail_exe = self.param('sendmail_exe') use_ephemeral_space = self.param('use_ephemeral_space') hpcUser = False # default value for hpc users pa = ProjectAdaptor(**{'session_class':igf_session_class}) pa.start_session() user_info = pa.get_project_user_info(project_igf_id=project_name) # fetch user info from db pa.close_session() user_info = user_info[user_info['data_authority']=='T'] # filter dataframe for data authority user_info = user_info.to_dict(orient='records') # convert dataframe to list of dictionaries if len(user_info) == 0: raise ValueError('No user found for project {0}'.format(project_name)) user_info = user_info[0] user_name = user_info['name'] # get username for irods login_name = user_info['username'] user_email = user_info['email_id'] user_category = user_info['category'] if user_category=='HPC_USER': hpcUser = True # set value for hpc user message = 'loading hpc user specific settings for {0}:{1}'.\ format(user_name,login_name) self.post_message_to_slack(message,reaction='pass') # send message to slack email_template_path = \ os.path.join(\ template_dir, email_template_path) template_env = \ Environment(\ loader=FileSystemLoader(\ searchpath=email_template_path), autoescape=select_autoescape(['html','xml'])) # set template env template_file = template_env.get_template(email_template) temp_work_dir = \ get_temp_dir(use_ephemeral_space=use_ephemeral_space) # get a temp dir report_output_file = \ os.path.join(\ temp_work_dir, email_template) template_file.\ stream(\ projectName=project_name, customerEmail=user_email, customerName=user_name, customerUsername=login_name, projectRunDate=seqrun_date, flowcellId=flowcell_id, hpcUser=hpcUser).\ dump(report_output_file) proc = \ subprocess.\ Popen(\ ['cat', report_output_file ], stdout=subprocess.PIPE) sendmail_cmd = \ [sendmail_exe, '-t', ] subprocess.\ check_call(\ sendmail_cmd, stdin=proc.stdout) proc.stdout.close() remove_dir(temp_work_dir) message = \ 'finished data processing for seqrun: {0}, project: {1}, sent mail to igf'.\ format(seqrun_igf_id, project_name) self.post_message_to_slack(message,reaction='pass') except Exception as e: message = \ 'seqrun: {2}, Error in {0}: {1}'.\ format(\ self.__class__.__name__, e, seqrun_igf_id) self.warning(message) self.post_message_to_slack(message,reaction='fail') # post msg to slack for failed jobs raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [{ "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB501820", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00345", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB501820", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) seqrun_data = [{ 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'flowcell_id': '000000000-BRN47', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }, { 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'flowcell_id': '000000001-BRN47', 'platform_igf_id': 'NB501820', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) project_data = [{'project_igf_id': 'projectA'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'sampleA', 'project_igf_id': 'projectA', 'species_name': 'HG38' }, { 'sample_igf_id': 'sampleB', 'project_igf_id': 'projectA', 'species_name': 'UNKNOWN' }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_MISEQ', 'library_name': 'sampleA', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_NEXTSEQ', 'library_name': 'sampleA', 'library_source': 'UNKNOWN', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'NEXTSEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleB', 'experiment_igf_id': 'sampleB_MISEQ', 'library_name': 'sampleB', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) run_data = [{ 'experiment_igf_id': 'sampleA_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1', 'lane_number': '1' }, { 'experiment_igf_id': 'sampleA_NEXTSEQ', 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'lane_number': '2' }, { 'experiment_igf_id': 'sampleB_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) file_data = [ { 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, ] fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data) collection_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'table': 'run' }] collection_files_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz' }] ca = CollectionAdaptor(**{'session': base.session}) ca.store_collection_and_attribute_data(data=collection_data) ca.create_collection_group(data=collection_files_data) base.close_session()