def extract_seq(args): # Download the file from Shock to the working directory. if args['nodeId'] is not None: shockClient = ShockClient(args['shockUrl'], args['auth']) shockClient.download_to_path(args['nodeId'], args['sourceFile']) # Extract the sequences from the source file. numReads = 0 with open(args['destFile'], 'w') as f: if args['sequenceLen'] > 0: # A length to trim to was specified for seqRecord in SeqIO.parse(args['sourceFile'], args['format']): seq = str(seqRecord.seq) if len(seq) < args['sequenceLen']: continue if len(seq) > args['sequenceLen']: seq = seq[:args['sequenceLen']] f.write(str(seq) + '\n') numReads += 1 if numReads == args['maxReads']: break elif args['maxReads'] > 0: for seqRecord in SeqIO.parse(args['sourceFile'], args['format']): f.write(str(seqRecord.seq) + '\n') numReads += 1 if numReads == args['maxReads']: break else: for seqRecord in SeqIO.parse(args['sourceFile'], args['format']): f.write(str(seqRecord.seq) + '\n') # Delete the file if it does not have enough reads. if args['minReads'] > 0 and numReads < args['minReads']: os.remove(args['destFile']) return 0
def loadDatabaseFiles(self, mylog): ''' Load the static database files from Shock. The static database files are stored in the directory specified by the data_folder_path configuration variable. A file is only downloaded if the file is not available on this system or the file has been updated in Shock. @param mylog Log object for messages @return Nothing @raise MissingFileError when database file is not found in Shock ''' # Get the current info about the static database files from the cache file. cacheFilename = self.StatusFiles['cache_file'] if os.path.exists(cacheFilename): fileCache = json.load(open(cacheFilename, "r")) else: fileCache = dict() # Create a shock client. shockClient = ShockClient(self.shockURL) # See if the static database files on this system are up-to-date with files stored in Shock. shockFiles = dict(self.DataFiles.items() + self.SearchFiles.items()) for key in shockFiles: # Get info about the file stored in Shock. localPath = shockFiles[key] name = os.path.basename(localPath) nodelist = shockClient.query_node( {'lookupname': 'ProbAnnoData/' + name}) if len(nodelist) == 0: message = "Database file %s is not available from %s\n" % ( name, self.shockURL) mylog.log_message(log.ERR, message) # MBM raise MissingFileError(message) node = nodelist[0] # Download the file if the checksum does not match or the file is not available on this system. download = False if key in fileCache: if node['file']['checksum']['md5'] != fileCache[key]['file'][ 'checksum']['md5']: download = True else: download = True if os.path.exists(localPath) == False: download = True if download: sys.stderr.write("Downloading %s to %s\n" % (key, localPath)) shockClient.download_to_path(node["id"], localPath) fileCache[key] = node mylog.log_message(log.INFO, 'Downloaded %s to %s' % (key, localPath)) # Save the updated cache file. json.dump(fileCache, open(cacheFilename, "w"), indent=4) return
def loadDatabaseFiles(self, mylog): ''' Load the static database files from Shock. The static database files are stored in the directory specified by the data_folder_path configuration variable. A file is only downloaded if the file is not available on this system or the file has been updated in Shock. @param mylog Log object for messages @return Nothing @raise MissingFileError when database file is not found in Shock ''' # Get the current info about the static database files from the cache file. cacheFilename = self.StatusFiles['cache_file'] if os.path.exists(cacheFilename): fileCache = json.load(open(cacheFilename, "r")) else: fileCache = dict() # Create a shock client. shockClient = ShockClient(self.shockURL) # See if the static database files on this system are up-to-date with files stored in Shock. shockFiles = dict(self.DataFiles.items() + self.SearchFiles.items()) for key in shockFiles: # Get info about the file stored in Shock. localPath = shockFiles[key] name = os.path.basename(localPath) nodelist = shockClient.query_node( { 'lookupname': 'ProbAnnoData/'+name } ) if len(nodelist) == 0: message = "Database file %s is not available from %s\n" %(name, self.shockURL) mylog.log_message(log.ERR, message) # MBM raise MissingFileError(message) node = nodelist[0] # Download the file if the checksum does not match or the file is not available on this system. download = False if key in fileCache: if node['file']['checksum']['md5'] != fileCache[key]['file']['checksum']['md5']: download = True else: download = True if os.path.exists(localPath) == False: download = True if download: sys.stderr.write("Downloading %s to %s\n" %(key, localPath)) shockClient.download_to_path(node["id"], localPath) fileCache[key] = node mylog.log_message(log.INFO, 'Downloaded %s to %s' %(key, localPath)) # Save the updated cache file. json.dump(fileCache, open(cacheFilename, "w"), indent=4) return
print "Job '%s' has status '%s' and is working on task %s of %s. Check again later." \ %(args.jobID, info['status'], info['total_progress'], info['max_progress']) exit(1) # Show job info. if args.showTimes: print 'Job started at %s and finished at %s' % (info['started'], info['last_update']) # Create a shock client. shockClient = ShockClient(info['results']['shockurl'], ujsClient._headers['AUTHORIZATION']) # Download the output to the specified file and remove the file from shock. try: shockClient.download_to_path(info['results']['shocknodes'][0], args.outputPath) except Exception as e: print 'Error downloading distance matrix from %s: %s' % ( info['results']['shockurl'], e.message) traceback.print_exc(file=sys.stdout) try: shockClient.delete_node(info['results']['shocknodes'][0]) except Exception as e: print 'Error deleting distance matrix file from %s: ' % ( +info['results']['shockurl'], e.message) traceback.print_exc(file=sys.stdout) # Delete the job. ujsClient.delete_job(args.jobID) exit(0)
# Check if the job is complete. if not info['complete']: print "Job '%s' has status '%s' and is working on task %s of %s. Check again later." \ %(args.jobID, info['status'], info['total_progress'], info['max_progress']) exit(1) # Show job info. if args.showTimes: print 'Job started at %s and finished at %s' %(info['started'], info['last_update']) # Create a shock client. shockClient = ShockClient(info['results']['shockurl'], ujsClient._headers['AUTHORIZATION']) # Download the output to the specified file and remove the file from shock. try: shockClient.download_to_path(info['results']['shocknodes'][0], args.outputPath) except Exception as e: print 'Error downloading distance matrix from %s: %s' %(info['results']['shockurl'], e.message) traceback.print_exc(file=sys.stdout) try: shockClient.delete_node(info['results']['shocknodes'][0]) except Exception as e: print 'Error deleting distance matrix file from %s: ' %(+info['results']['shockurl'], e.message) traceback.print_exc(file=sys.stdout) # Delete the job. ujsClient.delete_job(args.jobID) exit(0)