예제 #1
0
def extract_seq(args):
    # Download the file from Shock to the working directory.
    if args['nodeId'] is not None:
        shockClient = ShockClient(args['shockUrl'], args['auth'])
        shockClient.download_to_path(args['nodeId'], args['sourceFile'])

    # Extract the sequences from the source file.
    numReads = 0
    with open(args['destFile'], 'w') as f:
        if args['sequenceLen'] > 0:  # A length to trim to was specified
            for seqRecord in SeqIO.parse(args['sourceFile'], args['format']):
                seq = str(seqRecord.seq)
                if len(seq) < args['sequenceLen']:
                    continue
                if len(seq) > args['sequenceLen']:
                    seq = seq[:args['sequenceLen']]
                f.write(str(seq) + '\n')
                numReads += 1
                if numReads == args['maxReads']:
                    break
        elif args['maxReads'] > 0:
            for seqRecord in SeqIO.parse(args['sourceFile'], args['format']):
                f.write(str(seqRecord.seq) + '\n')
                numReads += 1
                if numReads == args['maxReads']:
                    break
        else:
            for seqRecord in SeqIO.parse(args['sourceFile'], args['format']):
                f.write(str(seqRecord.seq) + '\n')

    # Delete the file if it does not have enough reads.
    if args['minReads'] > 0 and numReads < args['minReads']:
        os.remove(args['destFile'])
    return 0
예제 #2
0
파일: Helpers.py 프로젝트: kbase/cbd
def extract_seq(args):
    # Download the file from Shock to the working directory.
    if args['nodeId'] is not None:
        shockClient = ShockClient(args['shockUrl'], args['auth'])
        shockClient.download_to_path(args['nodeId'], args['sourceFile'])

    # Extract the sequences from the source file.
    numReads = 0
    with open(args['destFile'], 'w') as f:
        if args['sequenceLen'] > 0: # A length to trim to was specified
            for seqRecord in SeqIO.parse(args['sourceFile'], args['format']):
                seq = str(seqRecord.seq)
                if len(seq) < args['sequenceLen']:
                    continue
                if len(seq) > args['sequenceLen']:
                    seq = seq[:args['sequenceLen']]
                f.write(str(seq) + '\n')
                numReads += 1
                if numReads == args['maxReads']:
                    break
        elif args['maxReads'] > 0:
            for seqRecord in SeqIO.parse(args['sourceFile'], args['format']):
                f.write(str(seqRecord.seq) + '\n')
                numReads += 1
                if numReads == args['maxReads']:
                    break
        else:
            for seqRecord in SeqIO.parse(args['sourceFile'], args['format']):
                f.write(str(seqRecord.seq) + '\n')

    # Delete the file if it does not have enough reads.
    if args['minReads'] > 0 and numReads < args['minReads']:
        os.remove(args['destFile'])
    return 0
예제 #3
0
    def loadDatabaseFiles(self, mylog):
        ''' Load the static database files from Shock.

            The static database files are stored in the directory specified by the
            data_folder_path configuration variable.  A file is only downloaded if
            the file is not available on this system or the file has been updated
            in Shock.

            @param mylog Log object for messages
            @return Nothing
            @raise MissingFileError when database file is not found in Shock
        '''

        # Get the current info about the static database files from the cache file.
        cacheFilename = self.StatusFiles['cache_file']
        if os.path.exists(cacheFilename):
            fileCache = json.load(open(cacheFilename, "r"))
        else:
            fileCache = dict()

        # Create a shock client.
        shockClient = ShockClient(self.shockURL)

        # See if the static database files on this system are up-to-date with files stored in Shock.
        shockFiles = dict(self.DataFiles.items() + self.SearchFiles.items())
        for key in shockFiles:
            # Get info about the file stored in Shock.
            localPath = shockFiles[key]
            name = os.path.basename(localPath)
            nodelist = shockClient.query_node(
                {'lookupname': 'ProbAnnoData/' + name})
            if len(nodelist) == 0:
                message = "Database file %s is not available from %s\n" % (
                    name, self.shockURL)
                mylog.log_message(log.ERR, message)  # MBM
                raise MissingFileError(message)
            node = nodelist[0]

            # Download the file if the checksum does not match or the file is not available on this system.
            download = False
            if key in fileCache:
                if node['file']['checksum']['md5'] != fileCache[key]['file'][
                        'checksum']['md5']:
                    download = True
            else:
                download = True
            if os.path.exists(localPath) == False:
                download = True
            if download:
                sys.stderr.write("Downloading %s to %s\n" % (key, localPath))
                shockClient.download_to_path(node["id"], localPath)
                fileCache[key] = node
                mylog.log_message(log.INFO,
                                  'Downloaded %s to %s' % (key, localPath))

        # Save the updated cache file.
        json.dump(fileCache, open(cacheFilename, "w"), indent=4)
        return
예제 #4
0
    def loadDatabaseFiles(self, mylog):
        ''' Load the static database files from Shock.

            The static database files are stored in the directory specified by the
            data_folder_path configuration variable.  A file is only downloaded if
            the file is not available on this system or the file has been updated
            in Shock.

            @param mylog Log object for messages
            @return Nothing
            @raise MissingFileError when database file is not found in Shock
        '''
        
        # Get the current info about the static database files from the cache file.
        cacheFilename = self.StatusFiles['cache_file']
        if os.path.exists(cacheFilename):
            fileCache = json.load(open(cacheFilename, "r"))
        else:
            fileCache = dict()
        
        # Create a shock client.
        shockClient = ShockClient(self.shockURL)

        # See if the static database files on this system are up-to-date with files stored in Shock.
        shockFiles = dict(self.DataFiles.items() + self.SearchFiles.items())
        for key in shockFiles:
            # Get info about the file stored in Shock.
            localPath = shockFiles[key]
            name = os.path.basename(localPath)
            nodelist = shockClient.query_node( { 'lookupname': 'ProbAnnoData/'+name } )
            if len(nodelist) == 0:
                message = "Database file %s is not available from %s\n" %(name, self.shockURL)
                mylog.log_message(log.ERR, message) # MBM
                raise MissingFileError(message)
            node = nodelist[0]
            
            # Download the file if the checksum does not match or the file is not available on this system.
            download = False
            if key in fileCache:
                if node['file']['checksum']['md5'] != fileCache[key]['file']['checksum']['md5']:
                    download = True
            else:
                download = True
            if os.path.exists(localPath) == False:
                download = True
            if download:
                sys.stderr.write("Downloading %s to %s\n" %(key, localPath))
                shockClient.download_to_path(node["id"], localPath)
                fileCache[key] = node
                mylog.log_message(log.INFO, 'Downloaded %s to %s' %(key, localPath))
                
        # Save the updated cache file.
        json.dump(fileCache, open(cacheFilename, "w"), indent=4)
        return
예제 #5
0
        print "Job '%s' has status '%s' and is working on task %s of %s.  Check again later." \
            %(args.jobID, info['status'], info['total_progress'], info['max_progress'])
        exit(1)

    # Show job info.
    if args.showTimes:
        print 'Job started at %s and finished at %s' % (info['started'],
                                                        info['last_update'])

    # Create a shock client.
    shockClient = ShockClient(info['results']['shockurl'],
                              ujsClient._headers['AUTHORIZATION'])

    # Download the output to the specified file and remove the file from shock.
    try:
        shockClient.download_to_path(info['results']['shocknodes'][0],
                                     args.outputPath)
    except Exception as e:
        print 'Error downloading distance matrix from %s: %s' % (
            info['results']['shockurl'], e.message)
        traceback.print_exc(file=sys.stdout)
    try:
        shockClient.delete_node(info['results']['shocknodes'][0])
    except Exception as e:
        print 'Error deleting distance matrix file from %s: ' % (
            +info['results']['shockurl'], e.message)
        traceback.print_exc(file=sys.stdout)

    # Delete the job.
    ujsClient.delete_job(args.jobID)

    exit(0)
예제 #6
0
파일: cbd-getmatrix.py 프로젝트: kbase/cbd
    # Check if the job is complete.
    if not info['complete']:
        print "Job '%s' has status '%s' and is working on task %s of %s.  Check again later." \
            %(args.jobID, info['status'], info['total_progress'], info['max_progress'])
        exit(1)

    # Show job info.
    if args.showTimes:
        print 'Job started at %s and finished at %s' %(info['started'], info['last_update'])

    # Create a shock client.
    shockClient = ShockClient(info['results']['shockurl'], ujsClient._headers['AUTHORIZATION'])
       
    # Download the output to the specified file and remove the file from shock.
    try:
        shockClient.download_to_path(info['results']['shocknodes'][0], args.outputPath)
    except Exception as e:
        print 'Error downloading distance matrix from %s: %s' %(info['results']['shockurl'], e.message)
        traceback.print_exc(file=sys.stdout)
    try:
        shockClient.delete_node(info['results']['shocknodes'][0])
    except Exception as e:
        print 'Error deleting distance matrix file from %s: ' %(+info['results']['shockurl'], e.message)
        traceback.print_exc(file=sys.stdout)
    
    # Delete the job.
    ujsClient.delete_job(args.jobID)
    
    exit(0)