def __init__(self, parser): print("initialising") self.analysistype = "rMLST" # self.referencefilepath = "/mnt/nas/Adam/assemblypipeline/rMLST/" self.referencefilepath = os.path.join(parser.referencedirectory, "") self.start = parser.start self.loader = SaveLoad() # If the file was empty and it couldn't load but created the file import json try: # Fresh file if not self.loader.load("bacteria.json", True): self.loader.organismdictionary = {'Escherichia': 'Escherichia coli#1', 'Shigella': 'Escherichia coli#1', 'Vibrio': 'Vibrio parahaemolyticus', 'Campylobacter': 'Campylobacter jejuni', 'Listeria': 'Listeria monocytogenes', 'Bacillus': 'Bacillus cereus', 'Klebsiella': 'Klebsiella pneumoniae'} self.loader.to_update = list(self.loader.organismdictionary.keys()) self.loader.dump("bacteria.json") if "organismdictionary" not in self.loader.__dict__: raise NameError except (json.decoder.JSONDecodeError, NameError): print("Invalid config file, please delete or fix") sys.exit(1) self.main()
def __init__(self): # import logging # logging.basicConfig(level=logging.INFO) # Vars import sys self.script_dir = sys.path[0] self.config_json = os.path.join(self.script_dir, "config.json") # Set up timer/logger import datetime if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')): os.makedirs(os.path.join(self.script_dir, 'runner_logs')) self.t = Timer(log_file=os.path.join( self.script_dir, 'runner_logs', datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H"))) self.t.set_colour(30) # Load issues that the bot has already responded to self.issue_loader = SaveLoad(os.path.join(self.script_dir, 'responded_issues.json'), create=True) self.responded_issues = set( self.issue_loader.get('responded_issues', default=[], ask=False)) # Get encrypted api key from config # Load the config self.loader = SaveLoad(self.config_json, create=True) self.redmine_api_key = self.loader.get('redmine_api_key_encrypted', default='none', ask=False) # If it's the first run then this will be yes self.first_run = self.loader.get('first_run', default='yes', ask=False) self.nas_mnt = os.path.normpath( self.loader.get('nasmnt', default="/mnt/nas/", get_type=str)) self.max_histories = self.loader.get('max_histories', default=6, get_type=int) self.seconds_between_redmine_checks = (self.loader.get( 'seconds_between_redmine_checks', default=600, get_type=int)) # Make sure all the arguments are there self.loader.get('workflow_id', default="f2db41e1fa331b3e") self.loader.get('ip', default="http://192.168.1.3:48888/") self.key = 'Sixteen byte key' self.redmine = None try: self.main() except Exception as e: import traceback self.t.time_print("[Error] Dumping...\n%s" % traceback.format_exc()) raise
def __init__(self, force): # import logging # logging.basicConfig(level=logging.INFO) # Vars import sys self.script_dir = sys.path[0] self.config_json = os.path.join(self.script_dir, "config.json") # Set up timer/logger import datetime if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')): os.makedirs(os.path.join(self.script_dir, 'runner_logs')) self.t = Timer(log_file=os.path.join( self.script_dir, 'runner_logs', datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H"))) self.t.set_colour(30) # Load issues that the bot has already responded to self.issue_loader = SaveLoad(os.path.join(self.script_dir, 'responded_issues.json'), create=True) self.responded_issues = set( self.issue_loader.get('responded_issues', default=[], ask=False)) # Get encrypted api key from config # Load the config self.loader = SaveLoad(self.config_json, create=True) self.redmine_api_key = self.loader.get('redmine_api_key_encrypted', default='none', ask=False) # If it's the first run then this will be yes self.first_run = self.loader.get('first_run', default='yes', ask=False) self.nas_mnt = os.path.normpath( self.loader.get('nasmnt', default="/mnt/nas/", get_type=str)) self.seconds_between_redmine_checks = self.loader.get( 'secs_between_redmine_checks', default=600, get_type=int) self.key = 'Sixteen byte key' self.redmine = None self.botmsg = '\n\n_I am a bot. This action was performed automatically._' try: self.main(force) except Exception as e: import traceback self.t.time_print("[Error] Dumping...\n%s" % traceback.format_exc()) raise
def __init__(self, inputs=None, args=None): self.missing = list() self.script_dir = sys.path[0] load = SaveLoad(file_name="config.json", create=True) if inputs is not None: self.automated = True self.inputs = inputs self.name = os.path.split(inputs['outputfolder'])[-1] nasmnt = load.get('nasmnt', default='/mnt/nas/') self.outfolder = inputs['outputfolder'] elif args is not None: self.automated = False self.name = os.path.normpath(args.output_folder) # If bad arguments if not args.fastq and not args.fasta: print( "Please use -q (fastq) and/or -f (fasta) to choose what filetype you want to retrieve." ) parser.print_help() exit(1) # Load NAS directory if args.nas is not None: nasmnt = args.nas else: nasmnt = load.get('nasmnt', default='/mnt/nas/') if not os.path.normpath(self.name).startswith('/'): self.outfolder = os.path.join(self.script_dir, self.name) else: self.outfolder = self.name else: raise ValueError('No inputs to constructor!') self.retriever = SequenceGetter(outputfolder=self.outfolder, nasmnt=os.path.normpath(nasmnt), output=False) import datetime if not os.path.exists(os.path.join(self.script_dir, 'extractor_logs')): os.makedirs(os.path.join(self.script_dir, 'extractor_logs')) self.t = Timer(log_file=os.path.join( self.script_dir, 'extractor_logs', datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H"))) self.t.set_colour(32)
def load(self): from pyaccessories.SaveLoad import SaveLoad as SaveLoad config = SaveLoad(os.path.join(self.script_dir, "config.json"), create=True) self.API_KEY = config.get('api_key') if not re.match(r"^\w{32}$", self.API_KEY): self.t.time_print("Invalid Galaxy API key.") exit(1) self.WORKFLOW_ID = config.get( 'workflow_id', default='f2db41e1fa331b3e') # SNVPhyl paired end if not re.match(r"^\w{16}$", self.WORKFLOW_ID): self.t.time_print("Invalid workflow ID format.") exit(1) self.IP = config.get('ip', default="http://192.168.1.3:48888/") self.NASMNT = os.path.normpath( config.get('nasmnt', default="/mnt/nas/"))
class Run(object): def main(self, force): if self.first_run == 'yes': choice = 'y' if force: raise ValueError('Need redmine API key!') else: if force: choice = 'n' else: self.t.time_print("Would you like to set the redmine api key? (y/n)") choice = input() if choice == 'y': self.t.time_print("Enter your redmine api key (will be encrypted to file)") self.redmine_api_key = input() # Encode and send to json file self.loader.redmine_api_key_encrypted = self.encode(self.key, self.redmine_api_key).decode('utf-8') self.loader.first_run = 'no' self.loader.dump(self.config_json) else: # Import and decode from file self.redmine_api_key = self.decode(self.key, self.redmine_api_key) import re if not re.match(r'^[a-z0-9]{40}$', self.redmine_api_key): self.t.time_print("Invalid Redmine API key!") exit(1) self.redmine = RedmineInterface('http://redmine.biodiversity.agr.gc.ca/', self.redmine_api_key) # self.respond_to_issue(self.redmine.get_issue_data(8983)['issue'], force=True) # use this to re-run issues # exit() self.main_loop() @staticmethod def generate_args(inputs): import argparse args = argparse.Namespace() args.reference = inputs['reference'] args.history_name = inputs['name'] args.noextract = False args.manual = False # Change this to true if you want to manually run the snvphyl return args @staticmethod def get_input(input_file, redmine_id): mode = 'none' regex = r"^(2\d{3}-\w{2,10}-\d{3,4}|\d{2}-\d{4})$" # Match xxxx-xxx-xxxx or xx-xxxx inputs = { 'reference': None, 'fastqs': list(), 'name': str(redmine_id) } import re for line in input_file: # Check for mode changes if line.lower().startswith('reference') and len(line) < len('reference') + 3: mode = 'ref' continue elif line.lower().startswith('compare') and len(line) < len('compare') + 3: mode = 'comp' continue elif line.lower() == '': # Blank line mode = 'none' continue if inputs['reference'] is not None and len(inputs['fastqs']) > 0 and mode == 'none': # Finished gathering all input break # Get seq-id if mode == 'ref': if re.match(regex, line): inputs['reference'] = line else: raise ValueError("Invalid seq-id \"%s\"" % line) elif mode == 'comp': if re.match(regex, line): inputs['fastqs'].append(line) else: pass raise ValueError("Invalid seq-id \"%s\"" % line) # Check for duplicates l = inputs['fastqs'] duplicates = set([x for x in l if l.count(x) > 1]) if len(duplicates) > 0: msg = "Duplicate SEQ-IDs!\n" for duplicate in duplicates: msg += duplicate + '\n' raise ValueError(msg) if inputs['reference'] is None or len(inputs['fastqs']) < 1: raise ValueError("Invalid format for redmine request.") return inputs def completed_response(self, result_path, redmine_id): from RedmineAPI.RedmineAPI import RedmineUploadError notes = "Completed running SNVPhyl. Results stored at %s" % os.path.join("NAS/bio_requests/%s" % redmine_id) try: self.redmine.upload_file(result_path, redmine_id, 'application/zip', file_name_once_uploaded="SNVPhyl_%s_Results.zip" % redmine_id) except RedmineUploadError: notes = "Couldn't upload your file to redmine. Results stored at %s" % \ os.path.join("NAS/bio_requests/%s" % redmine_id) # Assign it back to the author get = self.redmine.get_issue_data(redmine_id) self.redmine.update_issue(redmine_id, notes + self.botmsg, status_change=4, assign_to_id=get['issue']['author']['id']) def run_snvphyl(self, inputs): # Parse input args = self.generate_args(inputs) # noinspection PyBroadException from main import AutoSNVPhylError try: runner = AutoSNVPhyl(args, inputs=inputs) result_path = runner.run() # SNVPhyl finished, copy the zip to the NAS import shutil bio_request_folder = os.path.join(self.nas_mnt, 'bio_requests', inputs['name']) # Create folder with redmine id self.t.time_print("Creating directory %s" % bio_request_folder) if not os.path.exists(os.path.join(bio_request_folder)): os.makedirs(bio_request_folder) # Copy results to bio_request folder self.t.time_print("Copying %s to %s" % (result_path, bio_request_folder)) shutil.copy(result_path, bio_request_folder) # Respond on redmine self.completed_response(result_path, inputs['name']) except Exception as e: import traceback self.t.time_print("[Warning] AutoSNVPhyl had a problem, continuing redmine api anyways.") self.t.time_print("[AutoSNVPhyl Error Dump]\n" + traceback.format_exc()) # Send response if type(e) == AutoSNVPhylError or ValueError: msg = str(e) else: msg = traceback.format_exc() # Set it to feedback and assign it back to the author get = self.redmine.get_issue_data(inputs['name']) self.redmine.update_issue( inputs['name'], notes="There was a problem with your SNVPhyl. Please create a new issue on" " Redmine to re-run it.\n%s" % msg + self.botmsg, status_change=4, assign_to_id=get['issue']['author']['id'] ) def main_loop(self): import time while True: self.clear_space() self.make_call() self.t.time_print("Waiting for next check.") time.sleep(self.seconds_between_redmine_checks) def clear_space(self): from bioblend.galaxy import GalaxyInstance from bioblend import ConnectionError gi = GalaxyInstance(self.loader.get('ip', default='http://192.168.1.3:48888/'), key=self.loader.get('api_key')) self.t.time_print("Clearing space on Galaxy") while True: try: available = gi.histories.get_histories() # Ping galaxy break except ConnectionError as e: if e.status_code == 403: # Invalid API key self.t.time_print("Invalid Galaxy API Key!") del self.loader.__dict__['api_key'] self.loader.dump() self.loader.get('api_key') elif 'Max retries exceeded' in str(e.args[0]): self.t.time_print("Error: Galaxy isn't running/connection error.") self.t.time_print("Waiting 1 hour...") import time time.sleep(3600) else: raise if len(available) >= self.max_histories: msg = 'Clearing data.' else: msg = 'Not clearing data.' self.t.time_print("Currently %d histories on Galaxy. %s" % (len(available), msg)) while len(available) > self.max_histories: self.t.time_print("Deleting history %s to clear space..." % available.pop(len(available)-1)['name']) try: gi.histories.delete_history(available[-1]['id'], purge=True) except ConnectionError as e: if e.status_code == 403: # Invalid API key self.t.time_print("Invalid Galaxy API Key!") exit(1) elif 'Max retries exceeded' in str(e.args[0]): self.t.time_print("Error: Galaxy isn't running/connection error.") exit(1) else: raise self.t.time_print("Finished clearing space") def make_call(self): self.t.time_print("Checking for SNVPhyl requests...") data = self.redmine.get_new_issues('cfia') found = [] for issue in data['issues']: if issue['id'] not in self.responded_issues and issue['status']['name'] == 'New': if issue['subject'].lower() == 'snvphyl': found.append(issue) self.t.time_print("Found %d issues..." % len(found)) while len(found) > 0: # While there are still issues to respond to self.respond_to_issue(found.pop(len(found)-1)) self.clear_space() def respond_to_issue(self, issue, force=False): # Run snvphyl if self.redmine.get_issue_data(issue['id'])['issue']['status']['name'] == 'New' or force: self.t.time_print("Found SNVPhyl to run. Subject: %s. ID: %s" % (issue['subject'], issue['id'])) self.t.time_print("Adding to responded to") self.responded_issues.add(issue['id']) self.issue_loader.responded_issues = list(self.responded_issues) self.issue_loader.dump() # Turn the description into a list of lines input_list = issue['description'].split('\n') input_list = map(str.strip, input_list) # Get rid of \r error = False try: inputs = self.get_input(input_list, issue['id']) response = "Running SNVPhyl with reference %s\n\nComparing to:" % inputs['reference'] for fastq in list(inputs['fastqs']): response += '\n' + fastq if inputs['reference'] not in inputs['fastqs']: response += "Did you mean to not compare the reference to itself?" # TODO ask for answer except ValueError as e: response = "Sorry, there was a problem with your SNVPhyl request:\n%s\n" \ "Please submit a new request and close this one." % e.args[0] error = True if not error: # Rename file'Invalid name to rename %s. Ignoring.'s if the rename.txt text file is include more_msg, inputs['rename'] = self.rename_files(issue['id']) if more_msg is not None: response += '\n' + more_msg self.t.time_print('\n' + response) if error: # If something went wrong set the status to feedback and assign the author the issue get = self.redmine.get_issue_data(issue['id']) self.redmine.update_issue(issue['id'], notes=response + self.botmsg, status_change=4, assign_to_id=get['issue']['author']['id']) else: # Set the issue to in progress since the SNVPhyl is running self.redmine.update_issue(issue['id'], notes=response + self.botmsg, status_change=2) pass if error: return else: self.run_snvphyl(inputs) pass def rename_files(self, issue_id): self.t.time_print('Looking for txt') data = self.redmine.get_issue_data(issue_id) try: attachments = data['issue']['attachments'] if len(attachments) > 0: self.t.time_print('Found attachment to redmine request.') else: return None, [] except KeyError: # No attachments return None, [] rename = None for attachment in attachments: if attachment['filename'].endswith('.txt'): # Good self.t.time_print('Found %s, downloading...' % attachment['filename']) rename = self.redmine.download_file(attachment['content_url']) break if rename is None: return None, [] import re regex = r'([^\t\n\r]+)(?:,|\n|\r|\t)([^\t\n\r]+)' # Matches a list of comma separated pairs eg. seq,a,seq2,b pairs = re.findall(regex, rename) if len(pairs) == 0: return 'Invalid rename.txt file.', [] # Check its a good file ignore = [] for pair in pairs: # Check duplicate for compare in pairs: if pair[0] == compare[0]: if pair[1] != compare[1]: return 'Not using rename.txt because of duplicate definition in the rename.txt file', [] # Otherwise let it go # Make sure its valid regex = r'.+' # TODO actual regex if not re.fullmatch(regex, pair[1]): ignore.append(pair) # Feedback msg = "Renaming files before starting SNVPhyl." if len(ignore) > 0: msg += 'Some names were invalid and were ignored:\n' for out in ignore: msg += '\n%s' % out[1] # Convert to dict result = {} for pair in pairs: result[pair[0]] = pair[1] self.t.time_print(result) return msg, result @staticmethod def encode(key, string): encoded_chars = [] for i in range(len(string)): key_c = key[i % len(key)] encoded_c = chr(ord(string[i]) + ord(key_c) % 256) encoded_chars.append(encoded_c) encoded_string = "".join(encoded_chars) encoded_string = bytes(encoded_string, "utf-8") return base64.urlsafe_b64encode(encoded_string) @staticmethod def decode(key, string): decoded_chars = [] string = base64.urlsafe_b64decode(string).decode('utf-8') for i in range(len(string)): key_c = key[i % len(key)] encoded_c = chr(abs(ord(str(string[i])) - ord(key_c) % 256)) decoded_chars.append(encoded_c) decoded_string = "".join(decoded_chars) return decoded_string def __init__(self, force): # import logging # logging.basicConfig(level=logging.INFO) # Vars import sys self.script_dir = sys.path[0] self.config_json = os.path.join(self.script_dir, "config.json") # Set up timer/logger import datetime if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')): os.makedirs(os.path.join(self.script_dir, 'runner_logs')) self.t = Timer(log_file=os.path.join(self.script_dir, 'runner_logs', datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H"))) self.t.set_colour(30) # Load issues that the bot has already responded to self.issue_loader = SaveLoad(os.path.join(self.script_dir, 'responded_issues.json'), create=True) self.responded_issues = set(self.issue_loader.get('responded_issues', default=[], ask=False)) # Get encrypted api key from config # Load the config self.loader = SaveLoad(self.config_json, create=True) self.redmine_api_key = self.loader.get('redmine_api_key_encrypted', default='none', ask=False) # If it's the first run then this will be yes self.first_run = self.loader.get('first_run', default='yes', ask=False) self.nas_mnt = os.path.normpath(self.loader.get('nasmnt', default="/mnt/nas/", get_type=str)) self.max_histories = self.loader.get('max_histories', default=6, get_type=int) self.seconds_between_redmine_checks = (self.loader.get('seconds_between_redmine_checks', default=600, get_type=int)) # Make sure all the arguments are there self.loader.get('workflow_id', default="f2db41e1fa331b3e") self.loader.get('ip', default="http://192.168.1.3:48888/") self.key = 'Sixteen byte key' self.redmine = None self.botmsg = '\n\n_I am a bot. This action was performed automatically._' try: self.main(force) except Exception as e: import traceback self.t.time_print("[Error] Dumping...\n%s" % traceback.format_exc()) raise
class Run(object): def main(self, force): if self.first_run == 'yes': choice = 'y' if force: raise ValueError('Need redmine API key!') else: if force: choice = 'n' else: self.t.time_print( "Would you like to set the redmine api key? (y/n)") choice = input() if choice == 'y': self.t.time_print( "Enter your redmine api key (will be encrypted to file)") self.redmine_api_key = input() # Encode and send to json file self.loader.redmine_api_key_encrypted = self.encode( self.key, self.redmine_api_key).decode('utf-8') self.loader.first_run = 'no' self.loader.dump(self.config_json) else: # Import and decode from file self.redmine_api_key = self.decode(self.key, self.redmine_api_key) import re if not re.match(r'^[a-z0-9]{40}$', self.redmine_api_key): self.t.time_print("Invalid Redmine API key!") exit(1) self.redmine = RedmineInterface( 'http://redmine.biodiversity.agr.gc.ca/', self.redmine_api_key) self.main_loop() def get_input(self, input_file, redmine_id): mode = 'none' regex = r'^(2\d{3}-\w{2,10}-\d{3,4})$' inputs = { 'fastqs': list(), 'fastas': list(), 'outputfolder': os.path.join(self.nas_mnt, 'bio_requests', str(redmine_id)) } import re for line in input_file: # Check for mode changes if line.lower().startswith( 'fasta') and len(line) < len('fasta') + 3: mode = 'fasta' continue elif line.lower().startswith( 'fastq') and len(line) < len('fastq') + 3: mode = 'fastq' continue elif line.lower() == '': # Blank line mode = 'none' continue # Get seq-id if mode == 'fasta': if re.match(regex, line): inputs['fastas'].append(line) else: raise ValueError("Invalid seq-id \"%s\"" % line) elif mode == 'fastq': if re.match(regex, line): inputs['fastqs'].append(line) else: raise ValueError("Invalid seq-id \"%s\"" % line) if len(inputs['fastas']) < 1 and len(inputs['fastqs']) < 1: raise ValueError( "Invalid format for redmine request. Couldn't find any fastas or fastqs to extract" ) return inputs def completed_response(self, redmine_id, missing): notes = "Completed extracting files. Results stored at %s" % os.path.join( "NAS/bio_requests/%s" % redmine_id) if len(missing) > 0: notes += '\nMissing some files:\n' for file in missing: notes += file + '\n' # Assign it back to the author get = self.redmine.get_issue_data(redmine_id) self.redmine.update_issue(redmine_id, notes + self.botmsg, status_change=4, assign_to_id=get['issue']['author']['id']) def run_request(self, inputs): # Parse input # noinspection PyBroadException try: runner = MassExtractor(inputs=inputs) missing_files = runner.run() # Respond on redmine self.completed_response( os.path.split(inputs['outputfolder'])[-1], missing_files) except Exception as e: import traceback self.t.time_print( "[Warning] run.py had a problem, continuing redmine api anyways." ) self.t.time_print("[AutoSNVPhyl Error Dump]\n" + traceback.format_exc()) # Send response msg = traceback.format_exc() # Set it to feedback and assign it back to the author get = self.redmine.get_issue_data( os.path.split(inputs['outputfolder'])[-1]) self.redmine.update_issue( os.path.split(inputs['outputfolder'])[-1], notes= "There was a problem with your request. Please create a new issue on" " Redmine to re-run it.\n%s" % msg + self.botmsg, status_change=4, assign_to_id=get['issue']['author']['id']) def main_loop(self): import time while True: self.make_call() self.t.time_print("Waiting for next check.") time.sleep(self.seconds_between_redmine_checks) def make_call(self): self.t.time_print("Checking for extraction requests...") data = self.redmine.get_new_issues('cfia') found = [] for issue in data['issues']: if issue['id'] not in self.responded_issues and issue['status'][ 'name'] == 'New': if issue['subject'].lower() == 'retrieve': found.append(issue) self.t.time_print("Found %d issues..." % len(found)) while len(found) > 0: # While there are still issues to respond to self.respond_to_issue(found.pop(len(found) - 1)) def respond_to_issue(self, issue): # Run extraction if self.redmine.get_issue_data( issue['id'])['issue']['status']['name'] == 'New': self.t.time_print("Found extraction to run. Subject: %s. ID: %s" % (issue['subject'], issue['id'])) self.t.time_print("Adding to responded to") self.responded_issues.add(issue['id']) self.issue_loader.responded_issues = list(self.responded_issues) self.issue_loader.dump() # Turn the description into a list of lines input_list = issue['description'].split('\n') input_list = map(str.strip, input_list) # Get rid of \r error = False try: inputs = self.get_input(input_list, issue['id']) response = "Retrieving %d fastas and %d fastqs..." % (len( inputs['fastas']), len(inputs['fastqs'])) except ValueError as e: response = "Sorry, there was a problem with your request:\n%s\n" \ "Please submit a new request and close this one." % e.args[0] error = True self.t.time_print('\n' + response) if error: # If something went wrong set the status to feedback and assign the author the issue get = self.redmine.get_issue_data(issue['id']) self.redmine.update_issue( issue['id'], notes=response + self.botmsg, status_change=4, assign_to_id=get['issue']['author']['id']) else: # Set the issue to in progress since the SNVPhyl is running self.redmine.update_issue(issue['id'], notes=response + self.botmsg, status_change=2) if error: return else: self.run_request(inputs) @staticmethod def encode(key, string): encoded_chars = [] for i in range(len(string)): key_c = key[i % len(key)] encoded_c = chr(ord(string[i]) + ord(key_c) % 256) encoded_chars.append(encoded_c) encoded_string = "".join(encoded_chars) encoded_string = bytes(encoded_string, "utf-8") return base64.urlsafe_b64encode(encoded_string) @staticmethod def decode(key, string): decoded_chars = [] string = base64.urlsafe_b64decode(string).decode('utf-8') for i in range(len(string)): key_c = key[i % len(key)] encoded_c = chr(abs(ord(str(string[i])) - ord(key_c) % 256)) decoded_chars.append(encoded_c) decoded_string = "".join(decoded_chars) return decoded_string def __init__(self, force): # import logging # logging.basicConfig(level=logging.INFO) # Vars import sys self.script_dir = sys.path[0] self.config_json = os.path.join(self.script_dir, "config.json") # Set up timer/logger import datetime if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')): os.makedirs(os.path.join(self.script_dir, 'runner_logs')) self.t = Timer(log_file=os.path.join( self.script_dir, 'runner_logs', datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H"))) self.t.set_colour(30) # Load issues that the bot has already responded to self.issue_loader = SaveLoad(os.path.join(self.script_dir, 'responded_issues.json'), create=True) self.responded_issues = set( self.issue_loader.get('responded_issues', default=[], ask=False)) # Get encrypted api key from config # Load the config self.loader = SaveLoad(self.config_json, create=True) self.redmine_api_key = self.loader.get('redmine_api_key_encrypted', default='none', ask=False) # If it's the first run then this will be yes self.first_run = self.loader.get('first_run', default='yes', ask=False) self.nas_mnt = os.path.normpath( self.loader.get('nasmnt', default="/mnt/nas/", get_type=str)) self.seconds_between_redmine_checks = self.loader.get( 'secs_between_redmine_checks', default=600, get_type=int) self.key = 'Sixteen byte key' self.redmine = None self.botmsg = '\n\n_I am a bot. This action was performed automatically._' try: self.main(force) except Exception as e: import traceback self.t.time_print("[Error] Dumping...\n%s" % traceback.format_exc()) raise
class Run(object): def main(self, force): if self.first_run == 'yes': choice = 'y' if force: raise ValueError('Need redmine API key!') else: if force: choice = 'n' else: self.t.time_print( "Would you like to set the redmine api key? (y/n)") choice = input() if choice == 'y': self.t.time_print( "Enter your redmine api key (will be encrypted to file)") self.redmine_api_key = input() # Encode and send to json file self.loader.redmine_api_key_encrypted = self.encode( self.key, self.redmine_api_key).decode('utf-8') self.loader.first_run = 'no' self.loader.dump(self.config_json) else: # Import and decode from file self.redmine_api_key = self.decode(self.key, self.redmine_api_key) import re if not re.match(r'^[a-z0-9]{40}$', self.redmine_api_key): self.t.time_print("Invalid Redmine API key!") exit(1) self.redmine = RedmineInterface( 'http://redmine.biodiversity.agr.gc.ca/', self.redmine_api_key) self.main_loop() def completed_response(self, redmine_id, missing): notes = "Completed extracting files. Results stored at %s" % os.path.join( "NAS/bio_requests/%s" % redmine_id) if len(missing) > 0: notes += '\nMissing some files:\n' for file in missing: notes += file + '\n' # Assign it back to the author get = self.redmine.get_issue_data(redmine_id) self.redmine.update_issue(redmine_id, notes, status_change=4, assign_to_id=get['issue']['author']['id']) def main_loop(self): import time while True: self.make_call() self.t.time_print("Waiting for next check.") time.sleep(self.seconds_between_redmine_checks) def make_call(self): self.t.time_print("Checking for metadata requests...") data = self.redmine.get_new_issues('cfia') found = [] import re prog = re.compile(r'^assembly&metadata-\d{2}(\d{2}-\d{1,2}-\d{1,2})$') for issue in data['issues']: if issue['status']['name'] == 'New': # Get rid of caps and spaces and match subj = ''.join(issue['subject'].lower().split()) result = re.fullmatch(prog, subj) if result: found.append({ 'id': issue['id'], 'folder': ''.join(result.group(1).split('-')) }) self.t.time_print("Found %d issues..." % len(found)) while len(found) > 0: # While there are still issues to respond to self.respond_to_issue(found.pop(len(found) - 1)) # Check on old jobs self.t.time_print("Checking on old issues: ") for job in self.queue: msg = str(job['id']) + ': ' if self.check_assembly(job['folder']): 'Uploading.' results_zip = self.retrieve_files(job) response = "Retrieved data. Also stored at %s." % results_zip self.redmine.upload_file(results_zip, job['id'], 'application/zip', status_change=4, additional_notes=response + self.bottext) self.queue.remove(job) else: msg += 'Not ready.' self.t.time_print(msg) def respond_to_issue(self, job): # Run extraction if self.redmine.get_issue_data( job['id'])['issue']['status']['name'] == 'New': self.t.time_print( "Found metadata retrieving job to run. ID: %s, folder %s" % (str(job['id']), str(job['folder']))) if self.check_assembly(job['folder']): self.t.time_print('Uploading files...') # Retrieve results_zip = self.retrieve_files(job) response = "Retrieved data. Also stored at %s." % results_zip self.redmine.upload_file(results_zip, job['id'], 'application/zip', status_change=4, additional_notes=response + self.bottext) else: # response response = "Waiting for assembly to complete..." self.t.time_print(response) self.t.time_print("Adding to queue") self.queue.append(job) self.queue_loader.queue = self.queue self.queue_loader.dump() # Set the issue to in progress self.redmine.update_issue(job['id'], notes=response + self.bottext, status_change=2) def check_assembly(self, datestr): directory = os.path.join(self.nas_mnt, 'WGSspades', datestr + '_Assembled') return bool(os.path.isdir(directory)) def retrieve_files(self, job): results_folder = os.path.join(self.nas_mnt, 'bio_requests', str(job['id'])) os.makedirs(results_folder) results_zip = os.path.join(results_folder, str(job['id']) + '.zip') directory = os.path.join(self.nas_mnt, 'WGSspades', job['folder'] + '_Assembled') self.zip_results(os.path.join(directory, 'reports'), results_zip) return results_zip def zip_results(self, r_folder, outfolder): import zipfile # Zip all the files self.t.time_print("Creating zip file %s" % outfolder) try: os.remove(outfolder) except OSError: pass zipf = zipfile.ZipFile(outfolder, 'w', zipfile.ZIP_DEFLATED) for file in os.listdir(r_folder): zipf.write(os.path.join(r_folder, file)) self.t.time_print("Zipped %s" % file) zipf.close() @staticmethod def encode(key, string): encoded_chars = [] for i in range(len(string)): key_c = key[i % len(key)] encoded_c = chr(ord(string[i]) + ord(key_c) % 256) encoded_chars.append(encoded_c) encoded_string = "".join(encoded_chars) encoded_string = bytes(encoded_string, "utf-8") return base64.urlsafe_b64encode(encoded_string) @staticmethod def decode(key, string): decoded_chars = [] string = base64.urlsafe_b64decode(string).decode('utf-8') for i in range(len(string)): key_c = key[i % len(key)] encoded_c = chr(abs(ord(str(string[i])) - ord(key_c) % 256)) decoded_chars.append(encoded_c) decoded_string = "".join(decoded_chars) return decoded_string def __init__(self, force): # import logging # logging.basicConfig(level=logging.INFO) # Vars import sys self.script_dir = sys.path[0] self.config_json = os.path.join(self.script_dir, "config.json") # Set up timer/logger import datetime if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')): os.makedirs(os.path.join(self.script_dir, 'runner_logs')) self.t = Timer(log_file=os.path.join( self.script_dir, 'runner_logs', datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H"))) self.t.set_colour(30) # Save issues found to a queue (load existing issues if bot needs to restart) self.queue_loader = SaveLoad(os.path.join(self.script_dir, 'queue.json'), create=True) self.queue = self.queue_loader.get('queue', default=[], ask=False) # Get encrypted api key from config # Load the config self.loader = SaveLoad(self.config_json, create=True) self.redmine_api_key = self.loader.get('redmine_api_key_encrypted', default='none', ask=False) # If it's the first run then this will be yes self.first_run = self.loader.get('first_run', default='yes', ask=False) self.nas_mnt = os.path.normpath( self.loader.get('nasmnt', default="/mnt/nas/", get_type=str)) self.seconds_between_redmine_checks = self.loader.get( 'secs_between_redmine_checks', default=600, get_type=int) self.key = 'Sixteen byte key' self.redmine = None self.bottext = '\n\n_I am a bot. This action was performed automatically._' try: self.main(force) except Exception as e: import traceback self.t.time_print("[Error] Dumping...\n%s" % traceback.format_exc()) raise
class UpdateDatabase(object): def main(self): """Main Program, updates the database""" print("Main") start_time = time.time() self.getrmlsthelper(start_time) for organism in self.loader.to_update: self.getmlsthelper(start_time, organism) def getrmlsthelper(self, start): """ Makes a system call to rest_auth.pl, a Perl script modified from https://github.com/kjolley/BIGSdb/tree/develop/scripts/test And downloads the most up-to-date rMLST profile and alleles """ from subprocess import call # Folders are named based on the download date e.g 2016-04-26 # Find all folders (with the trailing / in the glob search) and remove the trailing / try: lastfolder = sorted(glob('{}{}/2*/'.format(self.referencefilepath, self.analysistype)))[-1].rstrip('/') except IndexError: lastfolder = "2000-01-01" delta, foldersize, d1 = self.schemedate(lastfolder) # Extract the path of the current script from the full path + file name homepath = os.path.split(os.path.abspath(__file__))[0] # Set the path/name of the folder to contain the new alleles and profile newfolder = '{}{}/{}'.format(self.referencefilepath, self.analysistype, d1) # System call rmlstupdatecall = 'cd {} && perl {}/rest_auth.pl -a {}/secret.txt'.format(newfolder, homepath, homepath) if foldersize < 100: printtime("Last update of rMLST profile and alleles was {} days ago. Updating".format(str(delta.days)), start) # Create the path make_path(newfolder) # Copy over the access token to be used in the authentication shutil.copyfile('{}/access_token'.format(homepath), '{}/access_token'.format(newfolder)) # Run rest_auth.pl call(rmlstupdatecall, shell=True) # Get the new alleles into a list, and create the combinedAlleles file alleles = glob('{}/*.tfa'.format(newfolder)) self.combinealleles(start, newfolder, alleles) # If the profile and alleles are up-to-date, set :newfolder to :lastfolder else: newfolder = lastfolder # Ensure that the profile/alleles updated successfully # Calculate the size of the folder by adding the sizes of all the files within the folder together newfoldersize = sum(os.path.getsize('{}/{}'.format(newfolder, f)) for f in os.listdir(newfolder) if os.path.isfile('{}/{}'.format(newfolder, f))) # If the profile/allele failed, remove the folder, and use the most recent update if newfoldersize < 100: shutil.rmtree(newfolder) try: newfolder = sorted(glob('{}{}/*/'.format(self.referencefilepath, self.analysistype)))[-1].rstrip('/') except IndexError: pass # Return the system call and the folder containing the profile and alleles return rmlstupdatecall, newfolder def getmlsthelper(self, start, organism): """Prepares to run the getmlst.py script provided in SRST2""" # Initialise a set to for the organism(s) for which new alleles and profiles are desired organismset = set() # Allow for Shigella to use the Escherichia MLST profile/alleles organism = organism if organism != 'Shigella' else 'Escherichia' # As there are multiple profiles for certain organisms, this dictionary has the schemes I use as values # Allow for a genus not in the dictionary being specified try: organismset.add(self.loader.organismdictionary[organism]) except KeyError: # Add the organism to the set organismset.add(organism) for scheme in organismset: organismpath = os.path.join(self.referencefilepath, 'MLST', organism) # Find all folders (with the trailing / in the glob search) and remove the trailing / try: lastfolder = sorted(glob('{}/*/'.format(organismpath)))[-1].rstrip('/') except IndexError: lastfolder = [] # Run the method to determine the most recent folder, and how recently it was updated delta, foldersize, d1 = self.schemedate(lastfolder) # Set the path/name of the folder to contain the new alleles and profile newfolder = '{}/{}'.format(organismpath, d1) if foldersize < 100: printtime('Downloading {} MLST scheme from pubmlst.org'.format(organism), start) # Create the object to store the argument attributes to feed to getmlst getmlstargs = GenObject() getmlstargs.species = scheme getmlstargs.repository_url = 'http://pubmlst.org/data/dbases.xml' getmlstargs.force_scheme_name = False getmlstargs.path = newfolder # Create the path to store the downloaded make_path(getmlstargs.path) getmlst.main(getmlstargs) # Even if there is an issue contacting the database, files are created, however, they are populated # with XML strings indicating that the download failed # Read the first character in the file try: profilestart = open(glob('{}/*.txt'.format(newfolder))[0]).readline() except IndexError: profilestart = [] # If it is a <, then the download failed if not profilestart or profilestart[0] == '<': # Delete the folder, and use the previous definitions instead shutil.rmtree(newfolder) newfolder = lastfolder # If the profile and alleles are up-to-date, set :newfolder to :lastfolder else: newfolder = lastfolder # If update isn't specified, don't update # Ensure that the profile/alleles updated successfully # Calculate the size of the folder by adding the sizes of all the files within the folder together try: newfoldersize = sum(os.path.getsize('{}/{}'.format(newfolder, f)) for f in os.listdir(newfolder) if os.path.isfile('{}/{}'.format(newfolder, f))) except (OSError, TypeError): newfoldersize = 100 # If the profile/allele failed, remove the folder, and use the most recent update if newfoldersize < 100: shutil.rmtree(newfolder) try: newfolder = sorted(glob('{}/*/'.format(organismpath)))[-1].rstrip('/') except IndexError: newfolder = organismpath # Return the name/path of the allele-containing folder return newfolder @staticmethod def combinealleles(start, allelepath, alleles): printtime('Creating combined rMLST allele file', start) records = [] # Open each allele file for allele in sorted(alleles): # with open(allele, 'rU') as fasta: for record in SeqIO.parse(open(allele, "rU"), "fasta"): # Extract the sequence record from each entry in the multifasta # Replace and dashes in the record.id with underscores record.id = record.id.replace('-', '_') # Remove and dashes or 'N's from the sequence data - makeblastdb can't handle sequences # with gaps # noinspection PyProtectedMember record.seq._data = record.seq._data.replace('-', '').replace('N', '') # Clear the name and description attributes of the record record.name = '' record.description = '' # Write each record to the combined file # SeqIO.write(record, combinedfile, 'fasta') records.append(record) with open('{}/rMLST_combined.fasta'.format(allelepath), 'w') as combinedfile: SeqIO.write(records, combinedfile, 'fasta') def schemedate(self, lastfolder): from datetime import date try: # Extract the folder name (date) from the path/name lastupdate = os.path.split(lastfolder)[1] except AttributeError: lastupdate = '2000-01-01' try: # Calculate the size of the folder by adding the sizes of all the files within the folder together foldersize = sum(os.path.getsize('{}/{}'.format(lastfolder, f)) for f in os.listdir(lastfolder) if os.path.isfile('{}/{}'.format(lastfolder, f))) except (TypeError, FileNotFoundError): foldersize = 0 # Try to figure out the year, month, and day from the folder name try: (year, month, day) = lastupdate.split("-") # Create a date object variable with the year, month, and day d0 = date(int(year), int(month), int(day)) except ValueError: # Set an arbitrary date in the past to force an update d0 = date(2000, 1, 1) # Create a date object with the current date d1 = date(int(time.strftime("%Y")), int(time.strftime("%m")), int(time.strftime("%d"))) # Subtract the last update date from the current date delta = d1 - d0 return delta, foldersize, d1 def __init__(self, parser): print("initialising") self.analysistype = "rMLST" # self.referencefilepath = "/mnt/nas/Adam/assemblypipeline/rMLST/" self.referencefilepath = os.path.join(parser.referencedirectory, "") self.start = parser.start self.loader = SaveLoad() # If the file was empty and it couldn't load but created the file import json try: # Fresh file if not self.loader.load("bacteria.json", True): self.loader.organismdictionary = {'Escherichia': 'Escherichia coli#1', 'Shigella': 'Escherichia coli#1', 'Vibrio': 'Vibrio parahaemolyticus', 'Campylobacter': 'Campylobacter jejuni', 'Listeria': 'Listeria monocytogenes', 'Bacillus': 'Bacillus cereus', 'Klebsiella': 'Klebsiella pneumoniae'} self.loader.to_update = list(self.loader.organismdictionary.keys()) self.loader.dump("bacteria.json") if "organismdictionary" not in self.loader.__dict__: raise NameError except (json.decoder.JSONDecodeError, NameError): print("Invalid config file, please delete or fix") sys.exit(1) self.main()