def __init__( self, igf_id_list, table_name, pipeline_name, dbconfig_file, log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, clean_up=True, ): ''' :param igf_id_list: A list of igf ids to uniquely identify the entity :param table_name: A database table name to look for the igf id available options are 'project','sample','experiment','run', 'file','seqrun','collection' :param pipeline_name: A pipeline name to change the status of the seed :param dbconfig_file: A file containing the database configuration :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param clean_up: Clean up input file once its processed, default True ''' try: self.igf_id_list = igf_id_list if table_name not in ('project', 'sample', 'experiment', 'run', 'file', 'seqrun', 'collection'): raise ValueError('Table {0} not supported for pipeline seed'.\ format(table_name)) self.table_name = table_name self.pipeline_name = pipeline_name self.clean_up = clean_up dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) self.log_slack = log_slack self.log_asana = log_asana if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise
def __init__(self, projet_info_path, dbconfig, user_account_template, log_slack=True, slack_config=None, check_hpc_user=False, hpc_user=None, hpc_address=None, ldap_server=None, setup_irods=True, notify_user=True, default_user_email='*****@*****.**', project_lookup_column='project_igf_id', user_lookup_column='email_id', data_authority_column='data_authority', sample_lookup_column='sample_igf_id', barcode_check_keyword='barcode_check', metadata_sheet_name='Project metadata', sendmail_exe='/usr/sbin/sendmail'): try: self.projet_info_path = projet_info_path self.user_account_template = user_account_template self.project_lookup_column = project_lookup_column self.user_lookup_column = user_lookup_column self.sample_lookup_column = sample_lookup_column self.data_authority_column = data_authority_column self.log_slack = log_slack dbparams = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparams) self.session_class = base.get_session_class() self.setup_irods = setup_irods self.notify_user = notify_user self.default_user_email = default_user_email self.barcode_check_keyword = barcode_check_keyword self.check_hpc_user = check_hpc_user self.hpc_user = hpc_user self.hpc_address = hpc_address self.ldap_server = ldap_server self.metadata_sheet_name = metadata_sheet_name self.sendmail_exe = sendmail_exe if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config=slack_config) if check_hpc_user and (hpc_user is None or \ hpc_address is None or \ ldap_server is None): raise ValueError('Hpc user {0} address {1}, and ldap server {2} are required for check_hpc_user'.\ format(hpc_user,hpc_address,ldap_server)) except: raise
def __init__(self, seqrun_path, seqrun_igf_list, dbconfig_file, clean_up=True, json_collection_type='ILLUMINA_BCL_MD5', log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, samplesheet_name='SampleSheet.csv'): ''' :param seqrun_path: A directory path for sequencing run home :param seqrun_igf_list: A file path listing sequencing runs to reset :param dbconfig_file: A file containing the database configuration :param clean_up: Clean up input file once its processed, default True :param json_collection_type: A collection type for md5 json file lookup, default ILLUMINA_BCL_MD5 :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param samplesheet_name: Name of the samplesheet file, default SampleSheet.csv ''' try: self.seqrun_path = seqrun_path self.seqrun_igf_list = seqrun_igf_list self.json_collection_type = json_collection_type self.log_slack = log_slack self.log_asana = log_asana self.clean_up = clean_up self.samplesheet_name = samplesheet_name dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise
def __init__(self,dbconfig_file,log_slack=True,slack_config=None): ''' :param dbconfig_file: A database configuration file path :param log_slack: A boolean flag for toggling Slack messages, default True :param slack_config: A file containing Slack tokens, default None ''' try: dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor=BaseAdaptor(**dbparams) self.log_slack=log_slack if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object except: raise
def fetch_input(self): ''' Fetch input method for base runnable :param dbconfig: A database configuration json file :param log_slack: A toggle for writing logs to slack :param log_asana: A toggle for writing logs to asana ''' try: dbconfig = self.param_required('dbconfig') dbparams = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparams) session_class = base.get_session_class() self.param('igf_session_class', session_class) # set session class for pipeline if self.param('log_slack'): slack_config = self.param_required('slack_config') igf_slack = IGF_slack(slack_config=slack_config) self.param('igf_slack', igf_slack) except: raise
from igf_data.utils.dbutils import clean_and_rebuild_database parser = argparse.ArgumentParser() parser.add_argument('-d', '--dbconfig_path', required=True, help='Database configuration json file') parser.add_argument('-s', '--slack_config', required=True, help='Slack configuration json file') args = parser.parse_args() dbconfig_path = args.dbconfig_path slack_config = args.slack_config slack_obj = IGF_slack(slack_config=slack_config) if __name__ == '__main__': try: clean_and_rebuild_database(dbconfig=dbconfig_path) slack_obj.post_message_to_channel( message= 'All old data removed from database and new tables are created', reaction='pass') except Exception as e: message = 'Failed to remove old data and create new tables, error: {0}'.format( e) slack_obj.post_message_to_channel(message, reaction='fail') raise ValueError(message)
class Modify_pipeline_seed: ''' A class for changing pipeline run status in the pipeline_seed table ''' def __init__( self, igf_id_list, table_name, pipeline_name, dbconfig_file, log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, clean_up=True, ): ''' :param igf_id_list: A list of igf ids to uniquely identify the entity :param table_name: A database table name to look for the igf id available options are 'project','sample','experiment','run', 'file','seqrun','collection' :param pipeline_name: A pipeline name to change the status of the seed :param dbconfig_file: A file containing the database configuration :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param clean_up: Clean up input file once its processed, default True ''' try: self.igf_id_list = igf_id_list if table_name not in ('project', 'sample', 'experiment', 'run', 'file', 'seqrun', 'collection'): raise ValueError('Table {0} not supported for pipeline seed'.\ format(table_name)) self.table_name = table_name self.pipeline_name = pipeline_name self.clean_up = clean_up dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) self.log_slack = log_slack self.log_asana = log_asana if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise def _fetch_pipeline_seed_entry(self, igf_id, select_seed_status=None, restrict_seed_status=None): ''' An internal method for fetching unique pipeline seed entry from database :param igf_id: A igf id to uniquely select pipe seed data :param select_seed_status: A list of seed status to include from the query, default None :param restrict_seed_status: A list of seed status to exclude from the query, default None ''' try: query = None if self.table_name == 'seqrun': query=self.base_adaptor.session.\ query(Pipeline_seed).\ join(Seqrun,Pipeline_seed.seed_id==Seqrun.seqrun_id).\ join(Pipeline).\ filter(Seqrun.seqrun_igf_id==igf_id).\ filter(Pipeline_seed.seed_table==self.table_name).\ filter(Pipeline.pipeline_id==Pipeline_seed.pipeline_id).\ filter(Pipeline.pipeline_name==self.pipeline_name) # get base query for seqrun table else: raise ValueError('Table {0} not supported for pipeline status reset'.\ format(self.table)) if select_seed_status is not None and \ isinstance(select_seed_status,list) and \ len(select_seed_status) > 0: query = query.filter( Pipeline_seed.status.in_( select_seed_status)) # add generic select filter if restrict_seed_status is not None and \ isinstance(restrict_seed_status,list) and \ len(restrict_seed_status)>0: query = query.filter( not_(Pipeline_seed.status.in_( restrict_seed_status))) # add generic restrict filter pipeseed_data=self.base_adaptor.fetch_records(query,\ output_mode='one_or_none') # fetch unique value for pipeline seed return pipeseed_data except: raise def reset_pipeline_seed_for_rerun(self, seeded_label='SEEDED', restricted_status_list=('SEEDED', 'RUNNING')): ''' A method for setting the pipeline for re-run if the first run has failed or aborted This method will set the pipeline_seed.status as 'SEEDED' only if its not already 'SEEDED' or 'RUNNING' :param seeded_label: A text label for seeded status, default SEEDED :param restricted_status_list: A list of pipeline status to exclude from the search, default ['SEEDED','RUNNING'] ''' try: db_connected = False restricted_status_list = list(restricted_status_list) input_id_list = self._read_input_list( igf_id_list=self.igf_id_list) # get input ids from file failed_ids = list() # define empty list of failed ids pass_list = list() # required for logging in asana base = self.base_adaptor base.start_session() # connect to database db_connected = True for igf_id in input_id_list: pipe_seed_data = self._fetch_pipeline_seed_entry( igf_id=igf_id, restrict_seed_status=restricted_status_list ) # get pipe seed data for igf id if pipe_seed_data is None: failed_ids.append(igf_id) # add igf id to failed list else: pl = PipelineAdaptor(**{'session': base.session }) # connect to pipeline adaptor updated_seed_data = [{ 'pipeline_id': pipe_seed_data.pipeline_id, 'seed_id': pipe_seed_data.seed_id, 'seed_table': pipe_seed_data.seed_table, 'status': seeded_label }] # set data for seed update pl.update_pipeline_seed( data=updated_seed_data, autosave=False) # update data to pipeline seed table pass_list.append(igf_id) base.commit_session() # save data to database after all changes base.close_session() # close database connection db_connected = False if self.clean_up: self._clear_input_list( file_path=self.igf_id_list, igf_list=failed_ids ) # over write input list and add failed ids for next try message = 'Overwriting pipeseed input list {0}'.format( self.igf_id_list) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass' ) # comment to slack for file over writing if len(pass_list) > 0: for id_line in pass_list: message='Changed pipeline seed for id {0}, pipeline {1}, to {2}'.\ format(id_line,self.pipeline_name,seeded_label) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass') # comment to slack channel if self.log_asana: self.igf_asana.comment_asana_task( task_name=id_line, comment=message) # comment on asana task except Exception as e: if db_connected: base.rollback_session() base.close_session() message = 'Failed to update pipeline seed, Error: {0}'.format( e) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel(message, reaction='fail') raise @staticmethod def _clear_input_list(file_path, igf_list): ''' A static method for clearing the seqrun list file :param seqrun_igf_list: A file containing the sequencing run ids ''' try: if not os.path.exists(file_path): raise IOError('File {0} not found'.format(file_path)) with open(file_path, 'w') as fwp: fwp.write('\n'.join(igf_list)) # over write input list file except: raise @staticmethod def _read_input_list(igf_id_list): ''' A static method for reading list of ids from an input file to a list :param igf_id_list: A file containing the input igf ids :return list: A list of ids from the input file ''' try: if not os.path.exists(igf_id_list): raise IOError('File {0} not found'.format(seqrun_igf_list)) id_list = list() # define an empty list of igf ids with open(igf_id_list, 'r') as fp: id_list = [i.strip() for i in fp] # add ids to the list return id_list except: raise
def __init__(self, slack_config_json, project_data_file): self.project_data_file = project_data_file self.igf_slack = IGF_slack( slack_config=slack_config_json) # create slack client instance
class Find_and_register_new_project_data: ''' A class for finding new data for project and registering them to the db. Account for new users will be created in irods server and password will be mailed to them. :param projet_info_path: A directory path for project info files :param dbconfig: A json dbconfig file :param check_hpc_user: Guess the hpc user name, True or False, default: False :param hpc_user: A hpc user name, default is None :param hpc_address: A hpc host address, default is None :param ldap_server: A ldap server address for search, default is None :param user_account_template: A template file for user account activation email :param log_slack: Enable or disable sending message to slack, default: True :param slack_config: A slack config json file, required if log_slack is True :param project_lookup_column: project data lookup column, default project_igf_id :param user_lookup_column: user data lookup column, default email_id :param sample_lookup_column: sample data lookup column, default sample_igf_id :param data_authority_column: data authority column name, default data_authority :param setup_irods: Setup irods account for user, default is True :param notify_user: Send email notification to user, default is True :param default_user_email: Add another user as the default collaborator for all new projects, default [email protected] :param barcode_check_keyword: Project attribute name for barcode check settings, default barcode_check :param sendmail_exe: Sendmail executable path, default /usr/sbin/sendmail ''' def __init__(self, projet_info_path, dbconfig, user_account_template, log_slack=True, slack_config=None, check_hpc_user=False, hpc_user=None, hpc_address=None, ldap_server=None, setup_irods=True, notify_user=True, default_user_email='*****@*****.**', project_lookup_column='project_igf_id', user_lookup_column='email_id', data_authority_column='data_authority', sample_lookup_column='sample_igf_id', barcode_check_keyword='barcode_check', metadata_sheet_name='Project metadata', sendmail_exe='/usr/sbin/sendmail'): try: self.projet_info_path = projet_info_path self.user_account_template = user_account_template self.project_lookup_column = project_lookup_column self.user_lookup_column = user_lookup_column self.sample_lookup_column = sample_lookup_column self.data_authority_column = data_authority_column self.log_slack = log_slack dbparams = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparams) self.session_class = base.get_session_class() self.setup_irods = setup_irods self.notify_user = notify_user self.default_user_email = default_user_email self.barcode_check_keyword = barcode_check_keyword self.check_hpc_user = check_hpc_user self.hpc_user = hpc_user self.hpc_address = hpc_address self.ldap_server = ldap_server self.metadata_sheet_name = metadata_sheet_name self.sendmail_exe = sendmail_exe if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config=slack_config) if check_hpc_user and (hpc_user is None or \ hpc_address is None or \ ldap_server is None): raise ValueError('Hpc user {0} address {1}, and ldap server {2} are required for check_hpc_user'.\ format(hpc_user,hpc_address,ldap_server)) except: raise def process_project_data_and_account(self): ''' A method for finding new project info and registering them to database and user account creation ''' try: new_project_info_list = self._find_new_project_info() if len(new_project_info_list) == 0: if self.log_slack: self.igf_slack.post_message_to_channel(message='No project info found',\ reaction='sleep') for project_info_file in new_project_info_list: try: new_data = self._read_project_info_and_get_new_entries( project_info_file ) # get new project, user and samples information self._check_and_register_data(data=new_data,\ project_info_file=project_info_file) # register data if self.log_slack: message='loaded new metadata from file {0}'.\ format(os.path.basename(project_info_file)) self.igf_slack.post_message_to_channel(message, reaction='pass') except Exception as e: # if error found in one file, skip the file message='skipped project info file {0}, got error {1}'.\ format(project_info_file,e) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='fail') # send message to slack except Exception as e: if self.log_slack: message = 'Error in registering project info: {0}'.format(e) self.igf_slack.post_message_to_channel(message, reaction='fail') raise def _check_existing_data(self, data, dbsession, table_name, check_column='EXISTS'): ''' An internal function for checking and registering project info :param data: A pandas data series :param dbsession: A sqlalchemy database session object :param table_name: A database table name :param check_column: Column name for existing data ''' try: if not isinstance(data, pd.Series): raise ValueError('Expecting a data series and got {0}'.format( type(data))) if table_name == 'project': if self.project_lookup_column in data and \ not pd.isnull(data[self.project_lookup_column]): project_igf_id = data[self.project_lookup_column] pa = ProjectAdaptor(**{'session': dbsession }) # connect to project adaptor project_exists = pa.check_project_records_igf_id( project_igf_id) if project_exists: # store data only if project is not existing data[check_column] = True else: data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}'.\ format(self.project_lookup_column)) elif table_name == 'user': if self.user_lookup_column in data and \ not pd.isnull(data[self.user_lookup_column]): user_email = data[self.user_lookup_column] ua = UserAdaptor(**{'session': dbsession}) # connect to user adaptor user_exists = ua.check_user_records_email_id( email_id=user_email) if user_exists: # store data only if user is not existing data[check_column] = True else: data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}'.\ format(self.user_lookup_column)) elif table_name == 'sample': if self.sample_lookup_column in data and \ not pd.isnull(data[self.sample_lookup_column]): project_igf_id = data[self.project_lookup_column] sample_igf_id = data[self.sample_lookup_column] sa = SampleAdaptor(**{'session': dbsession }) # connect to sample adaptor sample_project_exists=sa.check_project_and_sample(project_igf_id=project_igf_id,\ sample_igf_id=sample_igf_id) # check for existing sample_id and project-id combination if sample_project_exists: # store data only if sample is not existing data[check_column] = True else: sample_exists = sa.check_sample_records_igf_id( sample_igf_id) # check for existing sample if sample_exists: raise ValueError('Sample {0} exists in database but not associated with project {1}'.\ format(sample_igf_id,project_igf_id)) # inconsistency in sample project combination data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}'.\ format(self.sample_lookup_column)) elif table_name == 'project_user': if self.user_lookup_column in data and \ not pd.isnull(data[self.user_lookup_column]) and \ self.project_lookup_column in data and \ not pd.isnull(data[self.project_lookup_column]): project_igf_id = data[self.project_lookup_column] user_email = data[self.user_lookup_column] pa = ProjectAdaptor(**{'session': dbsession }) # connect to project adaptor project_user_exists=pa.check_existing_project_user(project_igf_id,\ email_id=user_email) if user_email != self.default_user_email and \ (self.data_authority_column not in data or \ pd.isnull(data[self.data_authority_column])): data[ self. data_authority_column] = True # set user as data authority, filter default user if project_user_exists: # store data only if sample is not existing data[check_column] = True else: data[check_column] = False return data else: raise ValueError('Missing or empty required column {0}, {1}'.\ format(self.project_lookup_column,\ self.user_lookup_column)) else: raise ValueError('table {0} not supported'.format(table_name)) except: raise def _notify_about_new_user_account(self,data,user_col='username',\ password_col='password',hpc_user_col='hpc_username',\ name_col='name',email_id_col='email_id'): ''' An internal method for sending mail to new user with their password :param data: A pandas series containing user data :param user_col: Column name for username, default username :param password_col: Column name for password, default password :param hpc_user_col: Column name for hpc_username, default hpc_username :param name_col: Column name for name, default name :param email_id_col: Column name for email id, default email_id ''' try: if not isinstance(data, pd.Series): raise ValueError('Expecting a pandas series and got {0}'.\ format(type(data))) username = data[user_col] fullname = data[name_col] password = data[password_col] email_id = data[email_id_col] if hpc_user_col not in data or pd.isnull( data[hpc_user_col]): # send email only to non-hpc users template_dir = os.path.dirname(self.user_account_template) template_env=Environment(loader=FileSystemLoader(searchpath=template_dir), \ autoescape=select_autoescape(['html','xml'])) # set template env template_file=template_env.\ get_template(os.path.basename(self.user_account_template)) temp_work_dir = get_temp_dir() # get a temp dir report_output_file = os.path.join(temp_work_dir, 'email_template.txt') template_file.\ stream(userEmail=email_id, \ fullName=fullname,\ userName=username,\ userPass=password,\ ).\ dump(report_output_file) read_cmd = ['cat', quote(report_output_file)] proc = subprocess.Popen(read_cmd, stdout=subprocess.PIPE) sendmail_cmd = [self.sendmail_exe, '-t'] subprocess.check_call(sendmail_cmd, stdin=proc.stdout) proc.stdout.close() if proc.returncode != None: raise ValueError('Failed running command {0}:{1}'.format(read_cmd,\ proc.returncode)) remove_dir(temp_work_dir) except: raise @staticmethod def _get_user_password(password_length=12): ''' An internal staticmethod for generating random password :param password_length: Required length of password, default 12 ''' try: new_password = None # default value of the new password is None symbols = '^!' # allowed symbols in password chars=string.ascii_lowercase+\ string.ascii_uppercase+\ string.digits+\ symbols # a string of lower case and upper case letters, digits and symbols symbol_pattern = re.compile(r'^[{0}]'.format(string.punctuation)) digit_pattern = re.compile(r'^[0-9]+') while new_password is None or \ re.match(symbol_pattern,new_password) or \ re.match(digit_pattern,new_password): # password can't be None or starts with digit or a symbol new_password=''.join([chars[ord(os.urandom(1)) % len(chars)] \ for i in range(password_length)]) # assign a new random password return new_password except: raise def _setup_irods_account(self,data,user_col='username',\ password_col='password',\ hpc_user_col='hpc_username',\ ): ''' An internal method for creating new user account in irods :param data: A pandas series containing user data :param user_col: Column name for username, deffault username :param password_col: Column name for password, default password :param hpc_user_col: Column name for hpc_username, default hpc_username ''' try: if not isinstance(data, pd.Series): raise ValueError('Expecting a pandas series and got {0}'.\ format(type(data))) if user_col not in data or pd.isnull(data[user_col]): raise ValueError('Missing required username') if (hpc_user_col not in data or pd.isnull(data[hpc_user_col])) and \ (password_col not in data or pd.isnull(data[password_col])): raise ValueError('Missing required field password for non-hpc user {0}'.\ format(data[user_col])) username = data[user_col] hpc_username = data[hpc_user_col] password = data[password_col] check_cmd1 = ['iadmin', 'lu'] check_cmd2 = ['grep', '-w', quote(username)] c_proc1 = subprocess.Popen(check_cmd1, stdout=subprocess.PIPE) c_proc2 = subprocess.Popen(check_cmd2, stdin=c_proc1.stdout, stdout=subprocess.PIPE) c_proc1.stdout.close() if c_proc1.returncode != None: raise ValueError('Failed running command {0}:{1}'.format(check_cmd1,\ c_proc1.returncode)) result = c_proc2.communicate()[0] result = result.decode('UTF-8') if result != '' and pd.isnull( data[hpc_user_col]): # for non hpc users if self.check_hpc_user: raise ValueError('Can not reset iRODS password for non hpc user {0} with check_hpc_user option'.\ format(username)) else: if password is not None or password != '': irods_passwd_cmd='{0} {1} {2}#{3} {4} {5}'.\ format('iadmin', 'moduser', quote(username), 'igfZone', 'password', quote(password)) # format irods command for shell subprocess.check_call(irods_passwd_cmd, shell=True) if self.log_slack: message='resetting irods account password for non-hpc user: {0}, password length: {1}'.\ format(username,len(password)) self.igf_slack.post_message_to_channel( message, reaction='pass') else: raise ValueError('Missing password for non-hpc user {0}'.\ format(quote(username))) elif result == '': irods_mkuser_cmd=['iadmin', 'mkuser', \ '{0}#igfZone'.format(quote(username)), 'rodsuser'] subprocess.check_call(irods_mkuser_cmd) # create irods user irods_chmod_cmd=['ichmod', '-M', 'own', 'igf', \ '/igfZone/home/{0}'.format(quote(username))] subprocess.check_call( irods_chmod_cmd) # change permission for irods user irods_inherit_cmd=['ichmod','-r', 'inherit', \ '/igfZone/home/{0}'.format(quote(username))] subprocess.check_call(irods_inherit_cmd) # inherit irods user if (hpc_username is None or hpc_username == '' ) and \ (password is not None or password != ''): if len(password) > 20: raise ValueError('check password for non hpc user {0}: {1}'.\ format(username,password)) # it could be the encrypted password irods_passwd_cmd='{0} {1} {2}#{3} {4} {5}'.\ format('iadmin', 'moduser', quote(username), 'igfZone', 'password', quote(password)) # format irods command for shell subprocess.check_call( irods_passwd_cmd, shell=True) # set password for non-hpc user if self.log_slack: message='created irods account for non-hpc user: {0}'.\ format(username) self.igf_slack.post_message_to_channel(message, reaction='pass') except: raise def _get_hpc_username(self, username): ''' An internal method for checking hpc accounts for new users This method is not reliable as the ldap server can be down from time to time :param username: A username string ''' try: cmd1=['ssh', \ '{0}@{1}'.format(quote(self.hpc_user),quote(self.hpc_address)), \ 'ldapsearch -x -h {0}'.format(quote(self.ldap_server)), \ ] cmd2=['grep',\ '-w',\ 'uid: {0}'.format(quote(username)), \ ] proc1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE) proc2 = subprocess.Popen(cmd2, stdin=proc1.stdout, stdout=subprocess.PIPE) proc1.stdout.close() if proc1.returncode != None: raise ValueError('Failed running command {0}:{1}'.format(cmd1,\ proc1.returncode)) result = proc2.communicate()[0] result = result.decode('UTF-8') if result == '': hpc_username = None else: hpc_username = username return hpc_username except: raise def _assign_username_and_password(self,data,user_col='username',\ hpc_user_col='hpc_username',\ password_col='password',\ email_col='email_id', hpc_category='HPC_USER', category_col='category'): ''' An internal method for assigning new user account and password :param data: A pandas series containing user data :param user_col: Column name for username, deffault username :param password_col: Column name for password, default password :param hpc_user_col: Column name for hpc_username, default hpc_username :param email_id_col: Column name for email id, default email_id :param category_col: Column name for user category, default category :param hpc_category: Category tag for hpc user, default: HPC_USER ''' try: if not isinstance(data, pd.Series): raise ValueError('Expecting a pandas series and got {0}'.\ format(type(data))) if (user_col not in data or pd.isnull(data[user_col])) and \ (hpc_user_col in data and not pd.isnull(data[hpc_user_col])): # if hpc username found, make it username data[user_col] = data[hpc_user_col] if (user_col not in data or (user_col in data and pd.isnull( data[user_col]))): # assign username from email id username, _ = data[email_col].split( '@', 1) # get username from email id data[user_col]=username[:10] if len(username)>10 \ else username # allowing only first 10 chars of the email id if (hpc_user_col not in data or pd.isnull(data[hpc_user_col])) and \ self.check_hpc_user: # assign hpc username hpc_username = self._get_hpc_username(username=data[user_col]) data[hpc_user_col] = hpc_username # set hpc username if user_col in data and not pd.isnull(data[user_col]) and \ hpc_user_col in data and not pd.isnull(data[hpc_user_col]) and \ data[user_col] != data[hpc_user_col]: # if user name and hpc username both are present, they should be same raise ValueError('username {0} and hpc_username {1} should be same'.\ format(data[user_col],data[hpc_user_col])) if (hpc_user_col not in data or pd.isnull(data[hpc_user_col])) and \ (password_col not in data or pd.isnull(data[password_col])): data[password_col] = self._get_user_password( ) # assign a random password if its not supplied if (category_col not in data or pd.isnull(data[category_col])) and \ (hpc_user_col in data and not pd.isnull(data[hpc_user_col])): # set user category for hpc users data[category_col] = hpc_category return data except: raise def _add_default_user_to_project(self, project_user_data): ''' An internal method for adding default user to the project_user_data dataframe :param project_user_data: A dataframe containing project_igf_id and email_id column :returns: a pandas dataframe with new row for the project_igf_id and default_user_email ''' try: new_project_user_data = list() for row in project_user_data.to_dict(orient='records'): new_project_user_data.append(row) row2 = deepcopy(row) row2[self.user_lookup_column] = self.default_user_email new_project_user_data.append(row2) new_project_user_data = pd.DataFrame(new_project_user_data) return new_project_user_data except: raise def _check_and_register_data(self, data, project_info_file): ''' An internal method for checking and registering data :param data: A dictionary containing following keys project_data user_data project_user_data sample_data :param project_info_file: A filepath for project info ''' try: db_connected = False project_data = pd.DataFrame(data['project_data']) user_data = pd.DataFrame(data['user_data']) project_user_data = pd.DataFrame(data['project_user_data']) sample_data = pd.DataFrame(data['sample_data']) base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() # connect_to db db_connected = True project_data = project_data[project_data[ self.project_lookup_column].isnull() == False] project_data = project_data.drop_duplicates() if project_data.index.size > 0: project_data=project_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='project', check_column='EXISTS'),\ axis=1) # get project map project_data = project_data[project_data['EXISTS'] == False] # filter existing projects project_data.drop('EXISTS', axis=1, inplace=True) # remove extra column user_data = user_data[user_data[self.user_lookup_column].isnull() == False] user_data = user_data.drop_duplicates() if user_data.index.size > 0: user_data=user_data.apply(lambda x: \ self._assign_username_and_password(x), \ axis=1) # check for use account and password user_data=user_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='user', check_column='EXISTS'),\ axis=1) # get user map user_data = user_data[user_data['EXISTS'] == False] # filter existing users user_data.drop('EXISTS', axis=1, inplace=True) # remove extra column sample_data = sample_data[sample_data[ self.sample_lookup_column].isnull() == False] sample_data = sample_data.drop_duplicates() if sample_data.index.size > 0: sample_data=sample_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='sample', check_column='EXISTS'),\ axis=1) # get sample map sample_data = sample_data[sample_data['EXISTS'] == False] # filter existing samples sample_data.drop('EXISTS', axis=1, inplace=True) # remove extra column project_user_data = project_user_data.drop_duplicates() project_user_data_mask=(project_user_data[self.project_lookup_column].isnull()==False) & \ (project_user_data[self.user_lookup_column].isnull()==False) project_user_data = project_user_data[ project_user_data_mask] # not allowing any empty values for project or user lookup if project_user_data.index.size > 0: project_user_data = self._add_default_user_to_project( project_user_data ) # update project_user_data with default users project_user_data=project_user_data.\ apply(lambda x: \ self._check_existing_data(\ data=x,\ dbsession=base.session, \ table_name='project_user', check_column='EXISTS'),\ axis=1) # get project user map project_user_data = project_user_data[project_user_data[ 'EXISTS'] == False] # filter existing project user project_user_data.drop('EXISTS', axis=1, inplace=True) # remove extra column if len(project_data.index) > 0: # store new projects pa1 = ProjectAdaptor(**{'session': base.session }) # connect to project adaptor pa1.store_project_and_attribute_data( data=project_data, autosave=False) # load project data if len(user_data.index) > 0: # store new users ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data, autosave=False) # load user data if len(project_user_data.index) > 0: # store new project users pa2 = ProjectAdaptor(**{'session': base.session }) # connect to project adaptor project_user_data = project_user_data.to_dict( orient='records') # convert dataframe to dictionary pa2.assign_user_to_project( data=project_user_data, autosave=False) # load project user data if len(sample_data.index) > 0: # store new samples sa = SampleAdaptor(**{'session': base.session }) # connect to sample adaptor sa.store_sample_and_attribute_data( data=sample_data, autosave=False) # load samples data if self.setup_irods: user_data.apply(lambda x: self._setup_irods_account(data=x), axis=1) # create irods account file_checksum = calculate_file_checksum(filepath=project_info_file) file_size = os.path.getsize(project_info_file) file_data=[{'file_path':project_info_file,\ 'location':'ORWELL',\ 'md5':file_checksum,\ 'size':file_size,\ }] fa = FileAdaptor(**{'session': base.session}) # connect to file adaptor fa.store_file_data(data=file_data, autosave=False) except: if db_connected: base.rollback_session() # rollback session raise else: if db_connected: base.commit_session() # commit changes to db if len(user_data.index) > 0 and self.notify_user: user_data.apply(lambda x: self._notify_about_new_user_account(x),\ axis=1) # send mail to new user with their password and forget it finally: if db_connected: base.close_session() # close db connection def _check_and_add_project_attributes(self, data_series): ''' An internal method for checking project data and adding required attributes :param data_series: A Pandas Series containing project data :returns: A Pandas series with project attribute information ''' try: if not isinstance(data_series, pd.Series): raise AttributeError('Expecting a Pandas Series and got {0}'.\ format(type(data_series))) if self.barcode_check_keyword not in data_series or \ pd.isnull(data_series[self.barcode_check_keyword]): data_series[ self. barcode_check_keyword] = 'ON' # by default barcode checking is always ON return data_series except: raise def _read_project_info_and_get_new_entries(self, project_info_file): ''' An internal method for processing project info data :param project_info_file: A filepath for project_info csv files :returns: A dictionary with following keys project_data user_data project_user_data sample_data ''' try: if fnmatch.fnmatch(project_info_file, '*.csv'): project_info_data = pd.read_csv( project_info_file) # read project info data from csv file elif fnmatch.fnmatch(project_info_file, '*xls'): xl = pd.ExcelFile(project_info_file) if self.metadata_sheet_name not in xl.sheet_names: # check for required metadata sheet name raise ValueError('Excel file does not have the sheet {0}'.\ format(self.metadata_sheet_name)) project_info_data = xl.parse( self.metadata_sheet_name ) # read xls file from the metadata sheet else: raise ValueError('No parser defined for file {0}'.\ format(project_info_file)) base = BaseAdaptor(**{'session_class': self.session_class}) required_project_columns=base.get_table_columns(table_name=Project, \ excluded_columns=['project_id']) # get project columns required_project_columns.append( self.barcode_check_keyword ) # add barcode check param to project attribute table required_user_columns=base.get_table_columns(table_name=User, \ excluded_columns=['user_id']) # get user columns required_project_user_columns = ['project_igf_id', 'email_id' ] # get project user columns project_data = project_info_data.loc[:, required_project_columns] # get data for project table user_data = project_info_data.loc[:, required_user_columns] # get data for user table project_user_data = project_info_data.loc[:, required_project_user_columns] # get data for project user table required_sample_columns=list(set(project_info_data.columns).\ difference(set(list(project_data)+\ list(user_data)+\ list(project_user_data)))) # all remaining column goes to sample tables required_sample_columns.append('project_igf_id') sample_data = project_info_data.loc[:, required_sample_columns] # get data for sample table project_data = project_data.drop_duplicates() project_data=project_data.apply(lambda x: \ self._check_and_add_project_attributes(x), axis=1) # add missing project attribute to the dataframe project_data['project_igf_id']=project_data['project_igf_id'].\ map(lambda x: x.replace(' ','')) # replace any white space from project igf id user_data = user_data.drop_duplicates() user_data['email_id']=user_data['email_id'].\ map(lambda x: x.replace(' ','')) # replace any white space from email id if 'name' in user_data.columns: user_data['name'].fillna('', inplace=True) user_data['name']=user_data['name'].\ map(lambda x: x.title()) # reformat name, if its present project_user_data = project_user_data.drop_duplicates() project_user_data['project_igf_id']=project_user_data['project_igf_id'].\ map(lambda x: x.replace(' ','')) # replace any white space from project igf id project_user_data['email_id']=project_user_data['email_id'].\ map(lambda x: x.replace(' ','')) # replace any white space from email id sample_data = sample_data.drop_duplicates( ) # remove duplicate entries sample_data['project_igf_id']=sample_data['project_igf_id'].\ map(lambda x: x.replace(' ','')) # replace any white space from project igf id sample_data['sample_igf_id']=sample_data['sample_igf_id'].\ map(lambda x: x.replace(' ','')) # replace any white space from sample igf id if self.project_lookup_column not in project_data.columns: raise ValueError('Missing required column: {0}'.\ format(self.project_lookup_column)) if self.user_lookup_column not in user_data.columns: raise ValueError('Missing required column: {0}'.\ format(self.user_lookup_column)) if self.sample_lookup_column not in sample_data.columns: raise ValueError('Missing required column: {0}'.\ format(self.sample_lookup_column)) # check if required columns are present in the dataframe return {'project_data':project_data,\ 'user_data':user_data,\ 'project_user_data':project_user_data,\ 'sample_data':sample_data} except: raise def _find_new_project_info(self): ''' An internal method for fetching new project info file It returns a list one new project info file ''' try: new_project_info_list = list() fa = FileAdaptor(**{'session_class': self.session_class}) fa.start_session() # connect to db for root_path, _, files in os.walk(self.projet_info_path, topdown=True): for file_path in files: if fnmatch.fnmatch(file_path, '*.csv') or \ fnmatch.fnmatch(file_path, '*xls'): # only consider csv or xls files file_check = fa.check_file_records_file_path( file_path=os.path.join( root_path, file_path)) # check for filepath in db if not file_check: new_project_info_list.append( os.path.join(root_path, file_path) ) # collect new project info files fa.close_session() # disconnect db return new_project_info_list except: raise
import argparse from igf_data.task_tracking.igf_slack import IGF_slack from igf_data.process.project_info.project_pooling_info import Project_pooling_info parser = argparse.ArgumentParser() parser.add_argument('-d','--dbconfig', required=True, help='Database configuration file path') parser.add_argument('-n','--slack_config', required=True, help='Slack configuration file path') parser.add_argument('-o','--output', required=True, help='Gviz json output path') args = parser.parse_args() dbconfig = args.dbconfig slack_config = args.slack_config output = args.output if __name__=='__main__': try: slack_obj = IGF_slack(slack_config=slack_config) pp = Project_pooling_info(dbconfig_file=dbconfig) pp.fetch_db_data_and_prepare_gviz_json(output_file_path=output) message = 'Updated project pooling stats' slack_obj.\ post_message_to_channel( message=message, reaction='pass') except Exception as e: message = 'Failed to updated project pooling stats' slack_obj.\ post_message_to_channel( message=message, reaction='fail') raise ValueError(message)
action='append', default=[], help='List of sub directories excluded from the search') args = parser.parse_args() seqrun_path = args.seqrun_path md5_path = args.md5_path dbconfig_path = args.dbconfig_path slack_config = args.slack_config asana_config = args.asana_config asana_project_id = args.asana_project_id pipeline_name = args.pipeline_name exclude_path = args.exclude_path samplesheet_json_schema = args.samplesheet_json_schema slack_obj = IGF_slack(slack_config=slack_config) asana_obj = IGF_asana(asana_config=asana_config, asana_project_id=asana_project_id) if __name__ == '__main__': try: new_seqruns = find_new_seqrun_dir(seqrun_path, dbconfig_path) new_seqruns,message = \ check_for_registered_project_and_sample( seqrun_info=new_seqruns, dbconfig=dbconfig_path) if message != '': msg_tmp_dir = get_temp_dir() # create temp dir time_tuple = datetime.now().timetuple() # get timetuple for NOW time_stamp = \ '{0}_{1}_{2}-{3}_{4}_{5}'.\
help='Update existing flowcell rules data, default: False') parser.add_argument('-d', '--dbconfig_path', required=True, help='Database configuration json file') parser.add_argument('-s', '--slack_config', required=True, help='Slack configuration json file') args = parser.parse_args() dbconfig_path = args.dbconfig_path slack_config = args.slack_config flowcell_data = args.flowcell_data update_data = args.update slack_obj = IGF_slack(slack_config=slack_config) if __name__ == '__main__': try: if update_data: raise NotImplementedError( 'methods notavailable for updaing existing data') else: load_new_flowcell_data(data_file=flowcell_data, dbconfig=dbconfig_path) except Exception as e: message = 'Failed to load data to flowcell rules table, error: {0}'.format( e) slack_obj.post_message_to_channel(message, reaction='fail') raise ValueError(message)
parser = argparse.ArgumentParser() parser.add_argument('-p', '--seqrun_data', required=True, help='Seqrun data json file') parser.add_argument('-d', '--dbconfig_path', required=True, help='Database configuration json file') parser.add_argument('-s', '--slack_config', required=True, help='Slack configuration json file') args = parser.parse_args() dbconfig_path = args.dbconfig_path slack_config = args.slack_config seqrun_data = args.seqrun_data slack_obj = IGF_slack(slack_config=slack_config) if __name__ == '__main__': try: load_new_seqrun_data(data_file=seqrun_data, dbconfig=dbconfig_path) except Exception as e: message = 'Failed to load data to seqrun table, error: {0}'.format(e) slack_obj.post_message_to_channel(message, reaction='fail') raise ValueError(message) else: slack_obj.post_message_to_channel( message='Loaded new seqrun info to db', reaction='pass')
class Reset_samplesheet_md5: ''' A class for modifying samplesheet md5 for seqrun data processing ''' def __init__(self, seqrun_path, seqrun_igf_list, dbconfig_file, clean_up=True, json_collection_type='ILLUMINA_BCL_MD5', log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, samplesheet_name='SampleSheet.csv'): ''' :param seqrun_path: A directory path for sequencing run home :param seqrun_igf_list: A file path listing sequencing runs to reset :param dbconfig_file: A file containing the database configuration :param clean_up: Clean up input file once its processed, default True :param json_collection_type: A collection type for md5 json file lookup, default ILLUMINA_BCL_MD5 :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param samplesheet_name: Name of the samplesheet file, default SampleSheet.csv ''' try: self.seqrun_path = seqrun_path self.seqrun_igf_list = seqrun_igf_list self.json_collection_type = json_collection_type self.log_slack = log_slack self.log_asana = log_asana self.clean_up = clean_up self.samplesheet_name = samplesheet_name dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise def _get_samplesheet_md5(self, seqrun_igf_id): ''' An internal method for calculating md5 value for updated samplesheet file :param seqrun_igf_id: A string of seqrun_igf_id :return string: MD5 value of the samplesheet.csv file ''' try: samplesheet_path = os.path.join(self.seqrun_path, seqrun_igf_id, self.samplesheet_name) if not os.path.exists(samplesheet_path): raise IOError('Samplesheet not found for seqrun {0}'.\ format(seqrun_igf_id)) return calculate_file_checksum(filepath=samplesheet_path, hasher='md5') except: raise @staticmethod def _get_updated_json_file(json_file_path, samplesheet_md5, samplesheet_name, file_field='seqrun_file_name', md5_field='file_md5'): ''' A static method for checking samplesheet md5 value in json file and create a new copy of json file with updated md5, if samplesheet has changed :param json_file_path: A file path for seqrun md5 json file :param samplesheet_md5: A md5 value for samplesheet file :param samplesheet_name: Name of the samplesheet file :param file_field: A keyword for filename loop up in json file, default seqrun_file_name :param md5_field: A keyword for md5 value look up in json file, default file_md5 :returns A string filepath if samplesheet has been updated or None ''' try: if not os.path.exists(json_file_path): raise IOError( 'Json md5 file {0} not found'.format(json_file_path)) create_new_file = False # don't create new json by default json_data = list() with open(json_file_path, 'r') as jp: json_data = json.load(jp) # load data from json file for json_row in json_data: if json_row[file_field]==samplesheet_name and \ json_row[md5_field]!=samplesheet_md5: json_row[ md5_field] = samplesheet_md5 # update json data with new md5 create_new_file = True # create new json if md5 values are not matching break # stop file look up if create_new_file: temp_dir = get_temp_dir() json_file_name = os.path.basename( json_file_path) # get original json filename temp_json_file = os.path.join( temp_dir, json_file_name) # get temp file path with open(temp_json_file, 'w') as jwp: json.dump(json_data, jwp, indent=4) # write data to temp file return temp_json_file # return file path else: return None # return none except: raise def run(self): ''' A method for resetting md5 values in the samplesheet json files for all seqrun ids ''' try: db_connected = False seqrun_list = self._read_seqrun_list( self.seqrun_igf_list ) # fetch list of seqrun ids from input file if len(seqrun_list) > 0: base = self.base_adaptor base.start_session() # connect to database db_connected = True ca = CollectionAdaptor(**{'session': base.session }) # connect to collection table fa = FileAdaptor(**{'session': base.session}) # connect to file table for seqrun_id in seqrun_list: try: files_data = ca.get_collection_files( collection_name=seqrun_id, collection_type=self.json_collection_type, output_mode='one_or_none' ) # check for existing md5 json file in db # TO DO: skip seqrun_id if pipeline is still running if files_data is not None: json_file_path = [ element.file_path for element in files_data if isinstance(element, File) ][0] # get md5 json file path from sqlalchemy collection results samplesheet_md5 = self._get_samplesheet_md5( seqrun_id ) # get md5 value for new samplesheet file new_json_path = self._get_updated_json_file( json_file_path, samplesheet_md5, self.samplesheet_name ) # get updated md5 json file if samplesheet has been changed if new_json_path is not None: new_json_file_md5 = calculate_file_checksum( filepath=new_json_path, hasher='md5') fa.update_file_table_for_file_path( file_path=json_file_path, tag='md5', value=new_json_file_md5, autosave=False ) # update json file md5 in db, don't commit yet move_file(source_path=new_json_path, destinationa_path=json_file_path, force=True) # overwrite json file base.commit_session() # save changes in db message='Setting new Samplesheet info for run {0}'.\ format(seqrun_id) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass') # send log to slack if self.log_asana: self.igf_asana.comment_asana_task( task_name=seqrun_id, comment=message) # send log to asana else: message = 'no change in samplesheet for seqrun {0}'.format( seqrun_id) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass') else: message='No md5 json file found for seqrun_igf_id: {0}'.\ format(seqrun_id) warnings.warn( message ) # not raising any exception if seqrun id is not found if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='fail') except Exception as e: base.rollback_session() message='Failed to update json file for seqrun id {0}, error : {1}'.\ format(seqrun_id,e) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='fail') base.close_session() # close db connection if self.clean_up: self._clear_seqrun_list( self.seqrun_igf_list) # clear input file else: if self.log_slack: message = 'No new seqrun id found for changing samplesheet md5' warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='sleep') except: if db_connected: base.rollback_session() base.close_session() raise @staticmethod def _clear_seqrun_list(seqrun_igf_list): ''' A static method for clearing the seqrun list file :param seqrun_igf_list: A file containing the sequencing run ids ''' try: if not os.path.exists(seqrun_igf_list): raise IOError('File {0} not found'.format(seqrun_igf_list)) with open(seqrun_igf_list, 'w') as fwp: fwp.write('') # over write seqrun list file except: raise @staticmethod def _read_seqrun_list(seqrun_igf_list): ''' A static method for reading list of sequencing run ids from a n input file to a list :param seqrun_igf_list: A file containing the sequencing run ids :return list: A list of seqrun ids from the input file ''' try: if not os.path.exists(seqrun_igf_list): raise IOError('File {0} not found'.format(seqrun_igf_list)) seqrun_ids = list() # define an empty list of seqrun ids with open(seqrun_igf_list, 'r') as fp: seqrun_ids = [i.strip() for i in fp] # add seqrun ids to the list return seqrun_ids except: raise
class Experiment_metadata_updator: ''' A class for updating metadata for experiment table in database ''' def __init__(self,dbconfig_file,log_slack=True,slack_config=None): ''' :param dbconfig_file: A database configuration file path :param log_slack: A boolean flag for toggling Slack messages, default True :param slack_config: A file containing Slack tokens, default None ''' try: dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor=BaseAdaptor(**dbparams) self.log_slack=log_slack if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object except: raise @staticmethod def _text_sum(a=None): if isinstance(a,list): return ';'.join(a) else: return a def update_metadta_from_sample_attribute(self,experiment_igf_id=None, sample_attribute_names=('library_source', 'library_strategy', 'experiment_type')): ''' A method for fetching experiment metadata from sample_attribute tables :param experiment_igf_id: An experiment igf id for updating only a selected experiment, default None for all experiments :param sample_attribute_names: A list of sample attribute names to look for experiment metadata, default: library_source, library_strategy, experiment_type ''' try: sample_attribute_names = list(sample_attribute_names) db_connected=False base=self.base_adaptor base.start_session() db_connected=True query=base.session.\ query(Experiment.experiment_igf_id).\ distinct(Experiment.experiment_id).\ join(Sample).\ join(Sample_attribute).\ filter(Sample.sample_id==Experiment.sample_id).\ filter(Sample.sample_id==Sample_attribute.sample_id).\ filter(Experiment.library_source=='UNKNOWN').\ filter(Experiment.library_strategy=='UNKNOWN').\ filter(Experiment.experiment_type=='UNKNOWN').\ filter(Sample_attribute.attribute_value.notin_('UNKNOWN')).\ filter(Sample_attribute.attribute_name.in_(sample_attribute_names)) # base query for db lookup if experiment_igf_id is not None: query=query.filter(Experiment.experiment_igf_id==experiment_igf_id) # look for specific experiment_igf_id exp_update_count=0 exps=base.fetch_records(query, output_mode='object') # fetch exp records as generator expression for row in exps: experiment_id=row[0] ea=ExperimentAdaptor(**{'session':base.session}) attributes=ea.fetch_sample_attribute_records_for_experiment_igf_id(experiment_igf_id=experiment_id, output_mode='object', attribute_list=sample_attribute_names) exp_update_data=dict() for attribute_row in attributes: exp_update_data.update({attribute_row.attribute_name:attribute_row.attribute_value}) if len(exp_update_data.keys())>0: exp_update_count+=1 ea.update_experiment_records_by_igf_id(experiment_igf_id=experiment_id, update_data=exp_update_data, autosave=False) # update experiment entry if attribute records are found base.commit_session() base.close_session() db_connected=False if self.log_slack: message='Update {0} experiments from sample attribute records'.\ format(exp_update_count) self.igf_slack.post_message_to_channel(message=message, reaction='pass') except Exception as e: if db_connected: base.rollback_session() base.close_session() message='Error while updating experiment records: {0}'.format(e) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel(message=message, reaction='fail') raise
asana_config = args.asana_config asana_project_id = args.asana_project_id pipeline_name = args.pipeline_name fastq_type = args.fastq_type project_name_file = args.project_name_file species_name = args.species_name library_source = args.library_source reset_project_list = args.reset_project_list if __name__=='__main__': try: if not os.path.exists(project_name_file): raise IOError('File {0} not found'.\ format(project_name_file)) slack_obj = IGF_slack(slack_config=slack_config) # get slack instance asana_obj = IGF_asana(asana_config=asana_config, asana_project_id=asana_project_id) # get asana object available_projects,seeded_projects = \ find_new_analysis_seeds( dbconfig_path=dbconfig_path, pipeline_name=pipeline_name, project_name_file=project_name_file, species_name_list=species_name, fastq_type=fastq_type, library_source_list=library_source) if available_projects is not None: message = 'New projects available for seeding: {0}'.\ format(available_projects) slack_obj.\ post_message_to_channel(