Ejemplo n.º 1
0
    def load(self, attNames):
        '''load attributes specified into nexus master dictionary.  Should be callable multiple times
        during Nexus lifetime'''

        #print 'loading', attNames

        #update selected attribute names
        [self._loadedAttNames.append(x) for x in attNames if x not in self._loadedAttNames]		

        #make entry in master dictionary
        [self.updateMasterDict([x]) for x in attNames if x not in self._attName_id_value]

        #open file and binary skip to correct line if packet            
        dataFile = cgFile.cgFile(self._dataFileName)
        if self._packetInfo:
            dataFile.seekToLineStart(self._packetInfo[0])

        #transcribe values
        currentID = 0
        for line in dataFile.file:

            ls = line.strip().split('\t')

            #get ID
            if self.hasIDs:
                id = int(ls[0]) #id is always first slot
            else:
                id = currentID
                currentID += 1
   
            #stop if at end of range
            if self._packetInfo:
                if id == self._packetInfo[1]:
                    break

            #transcribe
            #Note lots of copying is SLOW (10x)
            #only copy if list?
            #for listed stuff, do not use copy,
            #make new fxn that will just return a copy...faster
            for attName in attNames:
                colPosition = self._attName_columnPosition[attName]
                if colPosition < self.numSlots:
                    if ls[colPosition] != '.':
                        self._attName_id_value[attName][id] = self._attName_casteFromFxn[attName](ls[colPosition])
                    elif 'List' in self._attName__formatInfo[attName][1]: #change "in" to ==[-4:]?
                        self._attName_id_value[attName][id] = self._attName_defaultValue[attName][:]
                    else:
                        self._attName_id_value[attName][id] = copy(self._attName_defaultValue[attName])
                else:
                    self._attName_id_value[attName][id] = self._attName_defaultValue[attName] #no need for copy on primitive types

        dataFile.file.close()
Ejemplo n.º 2
0
    def save(self, outFN=None):

        if outFN == None: outFN = self._dataFileName

        if self._packetInfo:
            outFN += '.range.%s.%s' % (self._packetInfo[0],
                                       self._packetInfo[1])

        dataFile = cgFile.cgFile(self._dataFileName)
        if self._packetInfo:
            dataFile.seekToLineStart(self._packetInfo[0])

#create new file contents
        currentID = 0
        newLines = []
        for line in dataFile.file:
            ls = line.strip().split('\t')

            if self.hasIDs:
                id = int(ls[0])
            else:
                id = currentID
                currentID += 1

            if self._packetInfo:
                if id == self._packetInfo[1]: break

#save the rest
#TODO: lineUpdate with multiple injections
            for attName in self._selectedAttNames:
                newVal = self._attName_casteToFxn[attName](
                    self._attName_id_value[attName][id])
                ls = lineUpdate(ls, newVal,
                                self._attName_columnPosition[attName])

#only one newLine no matter the amount of attributes updated
            newLines.append('%s\n' % '\t'.join(ls))
        dataFile.file.close()

        #output file
        newLines = ''.join(
            newLines
        )  #might cause less clogging if there is only one write operation...
        f = open(outFN, 'w')
        f.write(newLines)
        f.close()

        #exit signal for parallel processes
        if self._packetInfo or self._splitRunFlag:
            f = open(outFN + '.exitSignal', 'w')
            f.write('DONE')
            f.close()
Ejemplo n.º 3
0
    def save(self, outFN = None):
            
        if outFN == None: outFN = self._dataFileName

        if self._packetInfo:
            outFN += '.range.%s.%s' % (self._packetInfo[0], self._packetInfo[1]) 

        dataFile = cgFile.cgFile(self._dataFileName)
        if self._packetInfo:
            dataFile.seekToLineStart(self._packetInfo[0])
        
        #create new file contents
        currentID = 0
        newLines = []
        for line in dataFile.file:
            ls = line.strip().split('\t')
           
            #id = int(ls[0]) if self.hasIDs else currentID
            if self.hasIDs:
                id = int(ls[0])
            else:
                id = currentID
                currentID += 1
            
            if self._packetInfo:
                if id == self._packetInfo[1]: break

            #save the rest
            colPos__vals = [(self._attName_columnPosition[x], self._attName_casteToFxn[x](self._attName_id_value[x][id])) for x in self._loadedAttNames]
            ls = lineUpdate(ls, colPos__vals)

            #only one newLine no matter the amount of attributes updated	
            newLines.append('%s\n' % '\t'.join(ls))
        dataFile.file.close()

        #output file
        #TODO have to write as you update...test if causes I/O clogging.  Can buffer it anyways...
        #update line --> write to temp file --> rename file after writing complete
        newLines = ''.join(newLines) #might cause less clogging if there is only one write operation...
        f = open(outFN, 'w')
        f.write(newLines)
        f.close()

        #exit signal for parallel processes
        if self._packetInfo or self._splitRunFlag:
            f = open(outFN + '.exitSignal', 'w')
            f.write('DONE')
            f.close()
Ejemplo n.º 4
0
        def save(self, outFN = None):
		
		if outFN == None: outFN = self._dataFileName

                if self._packetInfo:
			outFN += '.range.%s.%s' % (self._packetInfo[0], self._packetInfo[1]) 

                
                dataFile = cgFile.cgFile(self._dataFileName)
                if self._packetInfo:
                        dataFile.seekToLineStart(self._packetInfo[0])
                
                #create new file contents
                currentID = 0
		newLines = []
                for line in dataFile.file:
			ls = line.strip().split('\t')
			
                        if self.hasIDs:
                            id = int(ls[0])
                        else:
                            id = currentID
                            currentID += 1
                        
                        if self._packetInfo:
                                if id == self._packetInfo[1]: break

                        #save the rest
                        #TODO: lineUpdate with multiple injections
			for attName in self._selectedAttNames:
				newVal = self._attName_casteToFxn[attName](self._attName_id_value[attName][id])
				ls = lineUpdate(ls, newVal, self._attName_columnPosition[attName])

                        #only one newLine no matter the amount of attributes updated	
			newLines.append('%s\n' % '\t'.join(ls))
                dataFile.file.close()

		#output file
                newLines = ''.join(newLines) #might cause less clogging if there is only one write operation...
		f = open(outFN, 'w')
		f.write(newLines)
		f.close()

		#exit signal for parallel processes
                if self._packetInfo or self._splitRunFlag:
                        f = open(outFN + '.exitSignal', 'w')
                        f.write('DONE')
                        f.close()
Ejemplo n.º 5
0
    def load(self, attNames, paraInfo=[None, None]):
        '''paraInfo is [runNumber, numberOfRuns]'''

        #t = bioLibCG.cgTimer()
        #stage_cumTime = dict( (x, 0.0) for x in (''))
        #t.start()

        if paraInfo == ['splitRun', 'splitRun']:
            self._splitRunFlag = True
            paraInfo = [None,
                        None]  # now treat paraInfo as if there was nothing...

        if paraInfo != [None, None]:
            paraInfo[0] = int(paraInfo[0])
            paraInfo[1] = int(paraInfo[1])
            self._packetInfo = cgFile.getPacketInfo(
                self._dataFileName, paraInfo[1])[paraInfo[0] - 1]

    #if running parallel or specific range, mark range info
        self._selectedAttNames = attNames

        #get casting and column info
        self.loadTranscriptionInfo(attNames)

        #init master dictionaries
        self.initializeMasterDict()

        #get number of slots
        numSlots = self.getNumberOfSlots()

        #open file and binary skip to correct line if packet
        dataFile = cgFile.cgFile(self._dataFileName)
        if self._packetInfo:
            dataFile.seekToLineStart(self._packetInfo[0])

        #print 'before loop', t.split()
        #transcribe values
        currentID = 0
        for line in dataFile.file:

            ls = line.strip().split('\t')

            #get ID
            if self.hasIDs:
                id = int(ls[0])  #id is always first slot
            else:
                id = currentID
                currentID += 1

        #stop if at end of range
            if self._packetInfo:
                if id == self._packetInfo[1]:
                    break

            #transcribe
            #Note lots of copying is SLOW (10x)
            #only copy if list?
            for attName in attNames:
                colPosition = self._attName_columnPosition[attName]
                if colPosition < numSlots:
                    if ls[colPosition] != '.':
                        self._attName_id_value[attName][
                            id] = self._attName_casteFromFxn[attName](
                                ls[colPosition])
                    else:
                        self._attName_id_value[attName][id] = copy(
                            self._attName_defaultValue[attName])
                else:
                    self._attName_id_value[attName][id] = copy(
                        self._attName_defaultValue[attName])
        #print 'after loop', t.split()
        dataFile.file.close()

        #bind attribute names to dictionaries
        self.bindAttributes(attNames)

        #bind id attribute to first attribute, they all have the same ids...
        self.linkIDsToColumn()
Ejemplo n.º 6
0
	def load(self, attNames, paraInfo = [None, None]):
                '''paraInfo is [runNumber, numberOfRuns]'''
       
                #t = bioLibCG.cgTimer()
                #stage_cumTime = dict( (x, 0.0) for x in (''))
                #t.start()

                if paraInfo == ['splitRun', 'splitRun']:
                        self._splitRunFlag = True
                        paraInfo = [None, None] # now treat paraInfo as if there was nothing...
                
                if paraInfo != [None, None]: 
                        paraInfo[0] = int(paraInfo[0])
                        paraInfo[1] = int(paraInfo[1])
                        self._packetInfo = cgFile.getPacketInfo(self._dataFileName, paraInfo[1])[paraInfo[0] - 1]
                        
		#if running parallel or specific range, mark range info
		self._selectedAttNames = attNames		
		

		#get casting and column info
		self.loadTranscriptionInfo(attNames)

                #init master dictionaries
                self.initializeMasterDict()

		#get number of slots
                numSlots = self.getNumberOfSlots()
		
                #open file and binary skip to correct line if packet            
                dataFile = cgFile.cgFile(self._dataFileName)
                if self._packetInfo:
                        dataFile.seekToLineStart(self._packetInfo[0])

                #print 'before loop', t.split()
                #transcribe values
                currentID = 0
                for line in dataFile.file:

			ls = line.strip().split('\t')

                        #get ID
                        if self.hasIDs:
                            id = int(ls[0]) #id is always first slot
                        else:
                            id = currentID
                            currentID += 1
		
                        #stop if at end of range
                        if self._packetInfo:
                                if id == self._packetInfo[1]:
                                        break

			#transcribe
                        #Note lots of copying is SLOW (10x)
                        #only copy if list?
			for attName in attNames:
                                colPosition = self._attName_columnPosition[attName]
				if colPosition < numSlots:
                                        if ls[colPosition] != '.':
                                                self._attName_id_value[attName][id] = self._attName_casteFromFxn[attName](ls[colPosition])
                                        else:
					        self._attName_id_value[attName][id] = copy(self._attName_defaultValue[attName])
				else:
					self._attName_id_value[attName][id] = copy(self._attName_defaultValue[attName])
                #print 'after loop', t.split()
                dataFile.file.close()
                
		#bind attribute names to dictionaries
                self.bindAttributes(attNames)

                #bind id attribute to first attribute, they all have the same ids...
                self.linkIDsToColumn()