def load(self, attNames): '''load attributes specified into nexus master dictionary. Should be callable multiple times during Nexus lifetime''' #print 'loading', attNames #update selected attribute names [self._loadedAttNames.append(x) for x in attNames if x not in self._loadedAttNames] #make entry in master dictionary [self.updateMasterDict([x]) for x in attNames if x not in self._attName_id_value] #open file and binary skip to correct line if packet dataFile = cgFile.cgFile(self._dataFileName) if self._packetInfo: dataFile.seekToLineStart(self._packetInfo[0]) #transcribe values currentID = 0 for line in dataFile.file: ls = line.strip().split('\t') #get ID if self.hasIDs: id = int(ls[0]) #id is always first slot else: id = currentID currentID += 1 #stop if at end of range if self._packetInfo: if id == self._packetInfo[1]: break #transcribe #Note lots of copying is SLOW (10x) #only copy if list? #for listed stuff, do not use copy, #make new fxn that will just return a copy...faster for attName in attNames: colPosition = self._attName_columnPosition[attName] if colPosition < self.numSlots: if ls[colPosition] != '.': self._attName_id_value[attName][id] = self._attName_casteFromFxn[attName](ls[colPosition]) elif 'List' in self._attName__formatInfo[attName][1]: #change "in" to ==[-4:]? self._attName_id_value[attName][id] = self._attName_defaultValue[attName][:] else: self._attName_id_value[attName][id] = copy(self._attName_defaultValue[attName]) else: self._attName_id_value[attName][id] = self._attName_defaultValue[attName] #no need for copy on primitive types dataFile.file.close()
def save(self, outFN=None): if outFN == None: outFN = self._dataFileName if self._packetInfo: outFN += '.range.%s.%s' % (self._packetInfo[0], self._packetInfo[1]) dataFile = cgFile.cgFile(self._dataFileName) if self._packetInfo: dataFile.seekToLineStart(self._packetInfo[0]) #create new file contents currentID = 0 newLines = [] for line in dataFile.file: ls = line.strip().split('\t') if self.hasIDs: id = int(ls[0]) else: id = currentID currentID += 1 if self._packetInfo: if id == self._packetInfo[1]: break #save the rest #TODO: lineUpdate with multiple injections for attName in self._selectedAttNames: newVal = self._attName_casteToFxn[attName]( self._attName_id_value[attName][id]) ls = lineUpdate(ls, newVal, self._attName_columnPosition[attName]) #only one newLine no matter the amount of attributes updated newLines.append('%s\n' % '\t'.join(ls)) dataFile.file.close() #output file newLines = ''.join( newLines ) #might cause less clogging if there is only one write operation... f = open(outFN, 'w') f.write(newLines) f.close() #exit signal for parallel processes if self._packetInfo or self._splitRunFlag: f = open(outFN + '.exitSignal', 'w') f.write('DONE') f.close()
def save(self, outFN = None): if outFN == None: outFN = self._dataFileName if self._packetInfo: outFN += '.range.%s.%s' % (self._packetInfo[0], self._packetInfo[1]) dataFile = cgFile.cgFile(self._dataFileName) if self._packetInfo: dataFile.seekToLineStart(self._packetInfo[0]) #create new file contents currentID = 0 newLines = [] for line in dataFile.file: ls = line.strip().split('\t') #id = int(ls[0]) if self.hasIDs else currentID if self.hasIDs: id = int(ls[0]) else: id = currentID currentID += 1 if self._packetInfo: if id == self._packetInfo[1]: break #save the rest colPos__vals = [(self._attName_columnPosition[x], self._attName_casteToFxn[x](self._attName_id_value[x][id])) for x in self._loadedAttNames] ls = lineUpdate(ls, colPos__vals) #only one newLine no matter the amount of attributes updated newLines.append('%s\n' % '\t'.join(ls)) dataFile.file.close() #output file #TODO have to write as you update...test if causes I/O clogging. Can buffer it anyways... #update line --> write to temp file --> rename file after writing complete newLines = ''.join(newLines) #might cause less clogging if there is only one write operation... f = open(outFN, 'w') f.write(newLines) f.close() #exit signal for parallel processes if self._packetInfo or self._splitRunFlag: f = open(outFN + '.exitSignal', 'w') f.write('DONE') f.close()
def save(self, outFN = None): if outFN == None: outFN = self._dataFileName if self._packetInfo: outFN += '.range.%s.%s' % (self._packetInfo[0], self._packetInfo[1]) dataFile = cgFile.cgFile(self._dataFileName) if self._packetInfo: dataFile.seekToLineStart(self._packetInfo[0]) #create new file contents currentID = 0 newLines = [] for line in dataFile.file: ls = line.strip().split('\t') if self.hasIDs: id = int(ls[0]) else: id = currentID currentID += 1 if self._packetInfo: if id == self._packetInfo[1]: break #save the rest #TODO: lineUpdate with multiple injections for attName in self._selectedAttNames: newVal = self._attName_casteToFxn[attName](self._attName_id_value[attName][id]) ls = lineUpdate(ls, newVal, self._attName_columnPosition[attName]) #only one newLine no matter the amount of attributes updated newLines.append('%s\n' % '\t'.join(ls)) dataFile.file.close() #output file newLines = ''.join(newLines) #might cause less clogging if there is only one write operation... f = open(outFN, 'w') f.write(newLines) f.close() #exit signal for parallel processes if self._packetInfo or self._splitRunFlag: f = open(outFN + '.exitSignal', 'w') f.write('DONE') f.close()
def load(self, attNames, paraInfo=[None, None]): '''paraInfo is [runNumber, numberOfRuns]''' #t = bioLibCG.cgTimer() #stage_cumTime = dict( (x, 0.0) for x in ('')) #t.start() if paraInfo == ['splitRun', 'splitRun']: self._splitRunFlag = True paraInfo = [None, None] # now treat paraInfo as if there was nothing... if paraInfo != [None, None]: paraInfo[0] = int(paraInfo[0]) paraInfo[1] = int(paraInfo[1]) self._packetInfo = cgFile.getPacketInfo( self._dataFileName, paraInfo[1])[paraInfo[0] - 1] #if running parallel or specific range, mark range info self._selectedAttNames = attNames #get casting and column info self.loadTranscriptionInfo(attNames) #init master dictionaries self.initializeMasterDict() #get number of slots numSlots = self.getNumberOfSlots() #open file and binary skip to correct line if packet dataFile = cgFile.cgFile(self._dataFileName) if self._packetInfo: dataFile.seekToLineStart(self._packetInfo[0]) #print 'before loop', t.split() #transcribe values currentID = 0 for line in dataFile.file: ls = line.strip().split('\t') #get ID if self.hasIDs: id = int(ls[0]) #id is always first slot else: id = currentID currentID += 1 #stop if at end of range if self._packetInfo: if id == self._packetInfo[1]: break #transcribe #Note lots of copying is SLOW (10x) #only copy if list? for attName in attNames: colPosition = self._attName_columnPosition[attName] if colPosition < numSlots: if ls[colPosition] != '.': self._attName_id_value[attName][ id] = self._attName_casteFromFxn[attName]( ls[colPosition]) else: self._attName_id_value[attName][id] = copy( self._attName_defaultValue[attName]) else: self._attName_id_value[attName][id] = copy( self._attName_defaultValue[attName]) #print 'after loop', t.split() dataFile.file.close() #bind attribute names to dictionaries self.bindAttributes(attNames) #bind id attribute to first attribute, they all have the same ids... self.linkIDsToColumn()
def load(self, attNames, paraInfo = [None, None]): '''paraInfo is [runNumber, numberOfRuns]''' #t = bioLibCG.cgTimer() #stage_cumTime = dict( (x, 0.0) for x in ('')) #t.start() if paraInfo == ['splitRun', 'splitRun']: self._splitRunFlag = True paraInfo = [None, None] # now treat paraInfo as if there was nothing... if paraInfo != [None, None]: paraInfo[0] = int(paraInfo[0]) paraInfo[1] = int(paraInfo[1]) self._packetInfo = cgFile.getPacketInfo(self._dataFileName, paraInfo[1])[paraInfo[0] - 1] #if running parallel or specific range, mark range info self._selectedAttNames = attNames #get casting and column info self.loadTranscriptionInfo(attNames) #init master dictionaries self.initializeMasterDict() #get number of slots numSlots = self.getNumberOfSlots() #open file and binary skip to correct line if packet dataFile = cgFile.cgFile(self._dataFileName) if self._packetInfo: dataFile.seekToLineStart(self._packetInfo[0]) #print 'before loop', t.split() #transcribe values currentID = 0 for line in dataFile.file: ls = line.strip().split('\t') #get ID if self.hasIDs: id = int(ls[0]) #id is always first slot else: id = currentID currentID += 1 #stop if at end of range if self._packetInfo: if id == self._packetInfo[1]: break #transcribe #Note lots of copying is SLOW (10x) #only copy if list? for attName in attNames: colPosition = self._attName_columnPosition[attName] if colPosition < numSlots: if ls[colPosition] != '.': self._attName_id_value[attName][id] = self._attName_casteFromFxn[attName](ls[colPosition]) else: self._attName_id_value[attName][id] = copy(self._attName_defaultValue[attName]) else: self._attName_id_value[attName][id] = copy(self._attName_defaultValue[attName]) #print 'after loop', t.split() dataFile.file.close() #bind attribute names to dictionaries self.bindAttributes(attNames) #bind id attribute to first attribute, they all have the same ids... self.linkIDsToColumn()