def pdsHeaderParser(fileName): """Parser header of PDS file""" svName, parents, verList, verDict = decodeVersionInfo(fileName) PDSSIGNATURE = 3141592 # magic number for PDS format, as it should be pdsSignature = 0 # signature we read from given PDS file fileDesc = open(fileName, 'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # to handle endianess, read pdsSignature pdsSignature = headerHeader[0] >> 8 pds_utils.pdsSignature = pdsSignature pds_utils.PDSSIGNATURE = PDSSIGNATURE needToSwap = 0 if pdsSignature != PDSSIGNATURE: needToSwap = 1 headerHeader.byteswap() streamNames = pds_utils.name_list_from_file(fileDesc) shProxyNames = pds_utils.name_list_from_file(fileDesc) shNames = pds_utils.name_list_from_file(fileDesc) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] - fileDesc.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) oDict = {} for idx in xrange(0, len(streamNames)): oDict[streamNames[idx]] = proxiesInStreams[idx] return (fileDesc, needToSwap, streamNames, oDict)
def pdsHeaderParser(fileName): """Parser header of PDS file""" svName,parents,verList,verDict = decodeVersionInfo(fileName) PDSSIGNATURE=3141592 # magic number for PDS format, as it should be pdsSignature=0 # signature we read from given PDS file fileDesc = open(fileName,'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # to handle endianess, read pdsSignature pdsSignature = headerHeader[0]>>8 pds_utils.pdsSignature=pdsSignature pds_utils.PDSSIGNATURE=PDSSIGNATURE needToSwap = 0 if pdsSignature != PDSSIGNATURE: needToSwap= 1 headerHeader.byteswap() streamNames = pds_utils.name_list_from_file(fileDesc) shProxyNames = pds_utils.name_list_from_file(fileDesc) shNames = pds_utils.name_list_from_file(fileDesc) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] -fileDesc.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) oDict = {} for idx in xrange(0,len(streamNames)): oDict[streamNames[idx]]=proxiesInStreams[idx] return (fileDesc,needToSwap,streamNames,oDict)
def getProxies(fileName): """Return list of proxies (data) stored in pds location file""" # define signature of location file global PDSSIGNATURE pds_utils.PDSSIGNATURE = PDSSIGNATURE fileDesc = open(fileName, 'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) global pdsSignature pdsSignature = headerHeader[0] >> 8 pds_utils.pdsSignature = pdsSignature if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() nFiles = array.array('I') nFiles.fromfile(fileDesc, 1) if pdsSignature != PDSSIGNATURE: nFiles.byteswap() fileIDs = array.array('I') fileIDs.fromfile(fileDesc, 2 * nFiles[0]) if pdsSignature != PDSSIGNATURE: fileIDs.byteswap() streamNames = pds_utils.name_list_from_file(fileDesc) # grab the hash hash = array.array('I') hash.fromfile(fileDesc, 5) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] - fileDesc.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" dataKeysInStreams = find_datakeys_in_streams(restOfHeader[:-1], streamNames) return (streamNames, dataKeysInStreams)
def getProxies(fileName): """Return list of proxies (data) stored in pds location file""" # define signature of location file global PDSSIGNATURE pds_utils.PDSSIGNATURE=PDSSIGNATURE fileDesc = open(fileName,'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) global pdsSignature pdsSignature=headerHeader[0]>>8 pds_utils.pdsSignature=pdsSignature if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() nFiles = array.array('I') nFiles.fromfile(fileDesc,1) if pdsSignature != PDSSIGNATURE: nFiles.byteswap() fileIDs = array.array('I') fileIDs.fromfile(fileDesc,2*nFiles[0]) if pdsSignature != PDSSIGNATURE: fileIDs.byteswap() streamNames = pds_utils.name_list_from_file(fileDesc) # grab the hash hash = array.array('I') hash.fromfile(fileDesc,5) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] -fileDesc.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" dataKeysInStreams = find_datakeys_in_streams(restOfHeader[:-1], streamNames) return (streamNames,dataKeysInStreams)
def locationFileParser(locFileName): """Parse header of location file and read data types""" # define signature of location file global PDSSIGNATURE pds_utils.PDSSIGNATURE = PDSSIGNATURE # print "locFile",locFileName locFile = open(locFileName, 'rb') headerHeader = array.array('I') headerHeader.fromfile(locFile, 4) # to handle endianess, read pdsSignature global pdsSignature pdsSignature = headerHeader[0] >> 8 pds_utils.pdsSignature = pdsSignature needToSwap = 0 if pdsSignature != PDSSIGNATURE: needToSwap = 1 headerHeader.byteswap() # read file identifyer list nFiles = headerHeader[3] fileIDs = array.array('I') fileIDs.fromfile(locFile, 2 * nFiles) if pdsSignature != PDSSIGNATURE: fileIDs.byteswap() # read stream name list streamNames = pds_utils.name_list_from_file(locFile) # read hash hash = array.array('I') hash.fromfile(locFile, 5) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(locFile, headerHeader[2] - locFile.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: self.printError([], "Didn't parse correctly location file") sys.exit(1) dataKeysInStreams = find_datakeys_in_streams(restOfHeader[:-1], streamNames) # form dictionary of pairs {(stream,[datakeys])} oDict = {} maxDataKeys = 0 for idx in xrange(0, len(streamNames)): oDict[streamNames[idx]] = dataKeysInStreams[idx] if len(dataKeysInStreams[idx]) > maxDataKeys: maxDataKeys = len(dataKeysInStreams[idx]) nCharsForDataKeysInRecord = maxDataKeys while ((nCharsForDataKeysInRecord + 4) % 8): nCharsForDataKeysInRecord += 1 recordSize = len( fileIDs) + 1 + nCharsForDataKeysInRecord # in 32-bit words # create pds file Ids pdsIDList = [] for idx in xrange(0, len(fileIDs), 2): pdsID = (fileIDs[idx + 1] << 32) | fileIDs[idx] pdsIDList.append(int(pdsID)) posOfFirstRecord = locFile.tell() locFile.close() return [ streamNames, pdsIDList, oDict, hash, dataKeysInStreams, recordSize, posOfFirstRecord, needToSwap ]
def buildLocationHeader(iPDSFileName, iFileID): """Build a PDS location header, from given pds file name and file id""" # print "iPDSFileName",iPDSFileName pdsFile = open(iPDSFileName, 'rb') pdsFileID = int(iFileID) pdsHeader = array.array('I') pdsHeader.fromfile(pdsFile, 3) global pdsSignature pdsSignature = pdsHeader[0] >> 8 pds_utils.PDSSIGNATURE = PDSSIGNATURE pds_utils.pdsSignature = pdsSignature if pdsSignature != PDSSIGNATURE: pdsHeader.byteswap() needToSwap = 1 else: needToSwap = 0 locationHeader = array.array('I') locationHeader.fromlist([2951413 * 256]) #file id for master index locationHeader.append(0) #remaining number of words in of header locationHeader.append(0) #associated file list fileList = array.array('I') # for now only set the low word for the file ID fileList.fromlist([1, pdsFileID, 0]) locationHeader = locationHeader + fileList # now get the names of the streams # the location file and the PDS file use the same format for the # stream names so just have to copy the info numberOfRecordNameWords = array.array('I') numberOfRecordNameWords.fromfile(pdsFile, 1) if pdsSignature != PDSSIGNATURE: numberOfRecordNameWords.byteswap() #need number of names pdsFile.seek(4 * 4) nameChars = array.array('c') nameChars.fromfile(pdsFile, numberOfRecordNameWords[0] * 4) if pdsSignature != PDSSIGNATURE: nameChars.byteswap() streamNames = pds_utils.name_list(nameChars) sortedStreamNames = list(streamNames) sortedStreamNames.sort() #build conversion from new stream index to old index newStreamIndex2OldIndex = [] oldStreamIndex2NewIndex = [0] * len(streamNames) streamNameString = "" for name in sortedStreamNames: newStreamIndex2OldIndex.append(streamNames.index(name)) oldStreamIndex2NewIndex[ newStreamIndex2OldIndex[-1]] = len(newStreamIndex2OldIndex) - 1 streamNameString = streamNameString + name + "\0" streamNameString = streamNameString[:-1] while 0 != len(streamNameString) % 4: streamNameString = streamNameString + "\0" nameWords = array.array('I') nameWords.fromstring(streamNameString) locationHeader = locationHeader + numberOfRecordNameWords + nameWords #print streamNames shProxyNames = pds_utils.name_list_from_file(pdsFile) #print shProxyNames #print len(shProxyNames) shNames = pds_utils.name_list_from_file(pdsFile) #print shNames #grab the rest of the header restOfHeader = array.array('I') #the header size ignores the first 3 words in the event restOfHeader.fromfile(pdsFile, pdsHeader[2] - pdsFile.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() #print restOfHeader if restOfHeader[-1] != pdsHeader[2]: raise "header inconsistent" #create the list of 'type' 'usage' 'production' tags for each stream def proxySort(a, b): #print a, b temp = cmp(a[0], b[0]) if 0 == temp: temp = cmp(a[1], b[1]) if 0 == temp: temp = cmp(a[2], b[2]) return temp proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) #sortProxiesInStreams = proxiesInStreams for proxies in proxiesInStreams: proxies.sort(proxySort) #reserve space for our hash dataKeyHashIndex = len(locationHeader) dataKeyHashArray = array.array('I') dataKeyHashArray.fromlist([0] * 5) locationHeader += dataKeyHashArray maxNProxies = 0 nProxies = array.array('I') nProxies.fromlist([0]) proxiesArray = [] for oldIndex in newStreamIndex2OldIndex: #print oldIndex proxies = proxiesInStreams[oldIndex] nProxies[0] = len(proxies) if nProxies[0] > maxNProxies: maxNProxies = nProxies[0] locationHeader += nProxies datakeys = "" #now add each string proxyList = [] for proxy in proxies: proxyList.append(proxy) for key in proxy: datakeys += key + "\0" while (len(datakeys) % 4): datakeys += "\0" #print len(datakeys) #print datakeys #print len(datakeys) dataKeysArray = array.array('I') dataKeysArray.fromstring(datakeys) # proxiesArray+=[dataKeysArray.tolist()] proxiesArray += [proxyList] #nProxies[0] = len(dataKeysArray) #locationHeader += nProxies locationHeader += dataKeysArray #calculate the hash hash = sha.new(locationHeader[dataKeyHashIndex + 1:].tostring()).digest() #print sha.new( locationHeader[dataKeyHashIndex+1:].tostring() ).hexdigest() dataKeyHashArray = array.array('I') dataKeyHashArray.fromstring(hash) locationHeader[dataKeyHashIndex:dataKeyHashIndex + 5] = dataKeyHashArray locationHeaderBeforePadding = locationHeader.tolist() #pad header so Records begin on 8 byte boundary if not (len(locationHeader) % 2): locationHeader.fromlist([0]) headerLength = len(locationHeader) + 1 - 3 locationHeader.fromlist([headerLength]) locationHeader[2] = headerLength #pad the max number of ASUs to be a multiple of 8 nASUperRecord = maxNProxies while ((nASUperRecord + 4) % 8): nASUperRecord += 1 #For each stream, create the ASU to file ID list whichFileForStream = [] whichFileForStreamFake = [] for proxies in proxiesInStreams: whichFile = array.array('B') fakeArray = array.array('B') whichFile.fromlist([0] * len(proxies) + [255] * (nASUperRecord - len(proxies))) for x in xrange(0, len(whichFile)): fakeArray.append(255) #print whichFile whichFileForStream.append(whichFile) whichFileForStreamFake.append(fakeArray) return (pdsFile, locationHeader, locationHeaderBeforePadding, nameWords, dataKeyHashArray, proxiesArray, streamNames, oldStreamIndex2NewIndex, newStreamIndex2OldIndex, whichFileForStream, whichFileForStreamFake, needToSwap)
def getStreamDataKeyDictFromPDS(iPDSFileName): """Extract from PDS file streamDataKey dictionary""" pdsFile = open(iPDSFileName, 'rb') pdsHeader = array.array('I') pdsHeader.fromfile(pdsFile, 3) global pdsSignature pdsSignature = pdsHeader[0] >> 8 pds_utils.PDSSIGNATURE = PDSSIGNATURE pds_utils.pdsSignature = pdsSignature if pdsSignature != PDSSIGNATURE: pdsHeader.byteswap() needToSwap = 1 else: needToSwap = 0 # now get the names of the streams # the location file and the PDS file use the same format for the # stream names so just have to copy the info numberOfRecordNameWords = array.array('I') numberOfRecordNameWords.fromfile(pdsFile, 1) if pdsSignature != PDSSIGNATURE: numberOfRecordNameWords.byteswap() #need number of names pdsFile.seek(4 * 4) nameChars = array.array('c') nameChars.fromfile(pdsFile, numberOfRecordNameWords[0] * 4) if pdsSignature != PDSSIGNATURE: nameChars.byteswap() streamNames = pds_utils.name_list(nameChars) sortedStreamNames = list(streamNames) sortedStreamNames.sort() #build conversion from new stream index to old index newStreamIndex2OldIndex = [] for name in sortedStreamNames: newStreamIndex2OldIndex.append(streamNames.index(name)) #print streamNames shProxyNames = pds_utils.name_list_from_file(pdsFile) #print shProxyNames #print len(shProxyNames) shNames = pds_utils.name_list_from_file(pdsFile) #print shNames #grab the rest of the header restOfHeader = array.array('I') #the header size ignores the first 3 words in the event restOfHeader.fromfile(pdsFile, pdsHeader[2] - pdsFile.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() #print restOfHeader if restOfHeader[-1] != pdsHeader[2]: raise "header inconsistent" #create the list of 'type' 'usage' 'production' tags for each stream def proxySort(a, b): #print a, b temp = cmp(a[0], b[0]) if 0 == temp: temp = cmp(a[1], b[1]) if 0 == temp: temp = cmp(a[2], b[2]) return temp proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) #sortProxiesInStreams = proxiesInStreams for proxies in proxiesInStreams: proxies.sort(proxySort) streamDataKeyDict = {} for oldIndex in newStreamIndex2OldIndex: #print oldIndex stream = streamNames[oldIndex] proxies = proxiesInStreams[oldIndex] streamDataKeyDict[stream] = proxies return streamDataKeyDict
def pdsParser(file,what=""): """PDS file parser. Return a list of run/uid/sync.values/proxies in given file""" fileDesc = open(file,'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # global pdsSignature pdsSignature=headerHeader[0]>>8 pds_utils.PDSSIGNATURE=PDSSIGNATURE pds_utils.pdsSignature=pdsSignature if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() # invoke name_list_from_file with additional argument 1 which means # return number of words for those names streamNames,streamNamesWords = pds_utils.name_list_from_file(fileDesc,1) shProxyNames,shProxyNamesWords = pds_utils.name_list_from_file(fileDesc,1) shNames,shNamesWords = pds_utils.name_list_from_file(fileDesc,1) #grab the rest of the header restOfHeader = array.array('I') #the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] -fileDesc.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) # define proxy dictionary dict={'stream':listOfProxiesInThatStream} pList = [] for str in streamNames: strIndex = streamNames.index(str) pList.append((str,proxiesInStreams[strIndex])) posOfLastHeaderWords = fileDesc.tell() #start to read the rest of the file runList = [] uidList = [] syncList= [] while 1: try: endReason = "" recordHeader = array.array('I') recordHeader.fromfile(fileDesc,5) if pdsSignature != PDSSIGNATURE: recordHeader.byteswap() # read stream id and convert it to stream name streamId = int(recordHeader[0]) stream = streamNames[streamId] # read uid uid = ((recordHeader[4]<<32)|recordHeader[3]) # form syncValue syncValue = (recordHeader[1],recordHeader[2],uid,stream) syncList.append(syncValue) # read run record runNumber = recordHeader[1] if len(runList)==0 or (uid!=uidList[-1] or runNumber!=runList[-1]): runList.append(runNumber) uidList.append(uid) # read the record data recordDataSize = array.array('I') recordDataSize.fromfile(fileDesc,1) if pdsSignature != PDSSIGNATURE: recordDataSize.byteswap() # print "recordDataSize",recordDataSize recordData = array.array('I') recordData.fromfile(fileDesc,recordDataSize[0]-1) if pdsSignature != PDSSIGNATURE: recordData.byteswap() recordDataEnd = array.array('I') recordDataEnd.fromfile(fileDesc,1) if pdsSignature != PDSSIGNATURE: recordDataEnd.byteswap() if recordDataSize[0] != recordDataEnd[0]: print "Error of reading record data" sys.exit(1) except EOFError: break # runList.sort() # uidList.sort() # syncList.sort() pList.sort() if what=="run": return runList elif what=="uid": return uidList elif what=="syncValue": return syncList elif what=="proxies": return pList returnList = [runList,uidList,pList,syncList] returnList += [streamNames,streamNamesWords] returnList += [shProxyNames,shProxyNamesWords] returnList += [shNames,shNamesWords] returnList += [proxiesInStreams,posOfLastHeaderWords] return returnList
def dump(fileName, verbose=0, iStream='event'): """Dump content of pds file to stdout""" svName, parents, verList, verDict = decodeVersionInfo(fileName) if not parents: parents = 'N/A' if svName: print "Versioning information:" print "-----------------------" print "data version name: '%s'" % svName print "list of parents :", parents PDSSIGNATURE = 3141592 # magic number for PDS format, as it should be pdsSignature = 0 # signature we read from given PDS file fileDesc = open(fileName, 'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # to handle endianess, read pdsSignature pdsSignature = headerHeader[0] >> 8 pds_utils.pdsSignature = pdsSignature pds_utils.PDSSIGNATURE = PDSSIGNATURE if pdsSignature != PDSSIGNATURE: print "File was produced on another endian machine, byte swapping is enabled" if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() print print "List of streams in a file:", streamNames = pds_utils.name_list_from_file(fileDesc) for x in streamNames: print x, print shProxyNames = pds_utils.name_list_from_file(fileDesc) shNames = pds_utils.name_list_from_file(fileDesc) if verbose: print "List of types that storage helper proxy factories have:" for x in shProxyNames: print x, print "\n\n" print "List of types that only have storage helpers" for x in shNames: print x, print # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] - fileDesc.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) print for idx in xrange(0, len(streamNames)): print "List of data keys in stream:", streamNames[idx] print pds_utils.printProxies(proxiesInStreams[idx]) # want to only look at stuff in given stream (default is event stream) if not streamNames.count(iStream): print "There is no stream %s in %s" % (iStream, fileName) return eventIndex = streamNames.index(iStream) # create a structure to hold our info eventProxies = proxiesInStreams[eventIndex] accumulatedData = [] for proxy in eventProxies: accumulatedData.append([0, 488888888, 0]) # start to read the rest of the file numEvents = 0 firstSV = "" lastSV = "" while 1: try: endReason = "" pos = fileDesc.tell() recordHeader = array.array('I') recordHeader.fromfile(fileDesc, 5) if pdsSignature != PDSSIGNATURE: recordHeader.byteswap() uid = ((recordHeader[4] << 32) | recordHeader[3]) if not len(firstSV): firstSV = "%d/%d/%d" % (recordHeader[1], recordHeader[2], uid) lastSV = "%d/%d/%d" % (recordHeader[1], recordHeader[2], uid) if verbose: print "********************" print "position : %s" % pos print "stream : " + streamNames[recordHeader[0]] print "SyncValue : %s/%s/%s" % (recordHeader[1], recordHeader[2], uid) endReason = "bad Record: record size" recordDataLength = array.array('I') recordDataLength.fromfile(fileDesc, 1) if pdsSignature != PDSSIGNATURE: recordDataLength.byteswap() if verbose: print "size of record data: " + str(recordDataLength[0]) endReason = "bad Record: record (type " + streamNames[ recordHeader[0]] + ") (size " + str(recordDataLength[0]) + ")" begRecPos = fileDesc.tell() recordData = array.array('I') recordData.fromfile(fileDesc, recordDataLength[0]) if pdsSignature != PDSSIGNATURE: recordData.byteswap() numEvents = numEvents + 1 index = 0 endIndex = len(recordData) dataInfo = [] dataInfoSize = [] lengthOffset = 0 while index < endIndex - 1: proxyIndex = int(recordData[index]) index = index + 1 dataSize = recordData[index] index = index + int(dataSize) + 1 fullDataSize = dataSize + 2 strData = str(proxiesInStreams[recordHeader[0]][proxyIndex]) if lengthOffset < len(strData): lengthOffset = len(strData) dataInfo.append(strData) dataInfoSize.append(fullDataSize) # if verbose: # print str(proxiesInStreams[recordHeader[0]][proxyIndex]) + " size "+str(fullDataSize) if recordHeader[0] == eventIndex: accumulatedData[proxyIndex][ 0] = accumulatedData[proxyIndex][0] + fullDataSize if accumulatedData[proxyIndex][1] > fullDataSize: accumulatedData[proxyIndex][1] = fullDataSize if accumulatedData[proxyIndex][2] < fullDataSize: accumulatedData[proxyIndex][2] = fullDataSize if verbose: print for idx in xrange(0, len(dataInfo)): s = dataInfo[idx] print "%s size %s" % (s.ljust(lengthOffset), dataInfoSize[idx]) except EOFError: if verbose and endReason: print "########################################################" print endReason print "########################################################" break index = 0 if numEvents == 0: raise "no events found" if verbose: print "######### Summary ##########" print "Proxies in %s:" % iStream for item in eventProxies: print "%s : %i %f %i" % (item, accumulatedData[index][1], float(accumulatedData[index][0]) / float(numEvents), accumulatedData[index][2]) index = index + 1 print "First sync value :", firstSV print "Last sync value :", lastSV print "Number of syncValues :", numEvents print
def getStreamDataKeyDictFromPDS(iPDSFileName): """Extract from PDS file streamDataKey dictionary""" pdsFile = open(iPDSFileName,'rb') pdsHeader = array.array('I') pdsHeader.fromfile(pdsFile, 3) global pdsSignature pdsSignature=pdsHeader[0]>>8 pds_utils.PDSSIGNATURE=PDSSIGNATURE pds_utils.pdsSignature=pdsSignature if pdsSignature != PDSSIGNATURE: pdsHeader.byteswap() needToSwap=1 else: needToSwap=0 # now get the names of the streams # the location file and the PDS file use the same format for the # stream names so just have to copy the info numberOfRecordNameWords = array.array('I') numberOfRecordNameWords.fromfile(pdsFile,1) if pdsSignature != PDSSIGNATURE: numberOfRecordNameWords.byteswap() #need number of names pdsFile.seek(4*4) nameChars = array.array('c') nameChars.fromfile(pdsFile,numberOfRecordNameWords[0]*4) if pdsSignature != PDSSIGNATURE: nameChars.byteswap() streamNames = pds_utils.name_list(nameChars) sortedStreamNames = list(streamNames) sortedStreamNames.sort() #build conversion from new stream index to old index newStreamIndex2OldIndex = [] for name in sortedStreamNames: newStreamIndex2OldIndex.append(streamNames.index(name)) #print streamNames shProxyNames = pds_utils.name_list_from_file(pdsFile) #print shProxyNames #print len(shProxyNames) shNames = pds_utils.name_list_from_file(pdsFile) #print shNames #grab the rest of the header restOfHeader = array.array('I') #the header size ignores the first 3 words in the event restOfHeader.fromfile(pdsFile, pdsHeader[2] -pdsFile.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() #print restOfHeader if restOfHeader[-1] != pdsHeader[2]: raise "header inconsistent" #create the list of 'type' 'usage' 'production' tags for each stream def proxySort(a,b): #print a, b temp = cmp(a[0],b[0]) if 0 == temp: temp=cmp(a[1],b[1]) if 0 == temp: temp=cmp(a[2],b[2]) return temp proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) #sortProxiesInStreams = proxiesInStreams for proxies in proxiesInStreams: proxies.sort(proxySort) streamDataKeyDict = {} for oldIndex in newStreamIndex2OldIndex: #print oldIndex stream = streamNames[oldIndex] proxies = proxiesInStreams[oldIndex] streamDataKeyDict[stream] = proxies return streamDataKeyDict
def dump(fileName,verbose=0,iStream='event'): """Dump content of pds file to stdout""" svName,parents,verList,verDict = decodeVersionInfo(fileName) if not parents: parents = 'N/A' if svName: print "Versioning information:" print "-----------------------" print "data version name: '%s'"%svName print "list of parents :",parents PDSSIGNATURE=3141592 # magic number for PDS format, as it should be pdsSignature=0 # signature we read from given PDS file fileDesc = open(fileName,'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # to handle endianess, read pdsSignature pdsSignature = headerHeader[0]>>8 pds_utils.pdsSignature=pdsSignature pds_utils.PDSSIGNATURE=PDSSIGNATURE if pdsSignature != PDSSIGNATURE: print "File was produced on another endian machine, byte swapping is enabled" if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() print print "List of streams in a file:", streamNames = pds_utils.name_list_from_file(fileDesc) for x in streamNames: print x, print shProxyNames = pds_utils.name_list_from_file(fileDesc) shNames = pds_utils.name_list_from_file(fileDesc) if verbose: print "List of types that storage helper proxy factories have:" for x in shProxyNames: print x, print "\n\n" print "List of types that only have storage helpers" for x in shNames: print x, print # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] -fileDesc.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) print for idx in xrange(0,len(streamNames)): print "List of data keys in stream:",streamNames[idx] print pds_utils.printProxies(proxiesInStreams[idx]) # want to only look at stuff in given stream (default is event stream) if not streamNames.count(iStream): print "There is no stream %s in %s"%(iStream,fileName) return eventIndex = streamNames.index(iStream) # create a structure to hold our info eventProxies = proxiesInStreams[eventIndex] accumulatedData = [] for proxy in eventProxies: accumulatedData.append([0,488888888,0]) # start to read the rest of the file numEvents = 0 firstSV="" lastSV ="" while 1: try: endReason = "" pos = fileDesc.tell() recordHeader = array.array('I') recordHeader.fromfile(fileDesc,5) if pdsSignature != PDSSIGNATURE: recordHeader.byteswap() uid = ((recordHeader[4]<<32)|recordHeader[3]) if not len(firstSV): firstSV="%d/%d/%d"%(recordHeader[1],recordHeader[2],uid) lastSV="%d/%d/%d"%(recordHeader[1],recordHeader[2],uid) if verbose: print "********************" print "position : %s"%pos print "stream : "+streamNames[recordHeader[0]] print "SyncValue : %s/%s/%s"%(recordHeader[1],recordHeader[2],uid) endReason = "bad Record: record size" recordDataLength = array.array('I') recordDataLength.fromfile(fileDesc,1) if pdsSignature != PDSSIGNATURE: recordDataLength.byteswap() if verbose: print "size of record data: "+str(recordDataLength[0]) endReason = "bad Record: record (type "+streamNames[recordHeader[0]]+") (size " + str(recordDataLength[0]) +")" begRecPos = fileDesc.tell() recordData = array.array('I') recordData.fromfile(fileDesc, recordDataLength[0]) if pdsSignature != PDSSIGNATURE: recordData.byteswap() numEvents = numEvents+1 index = 0 endIndex = len(recordData) dataInfo = [] dataInfoSize = [] lengthOffset = 0 while index < endIndex - 1: proxyIndex = int(recordData[index]) index = index + 1 dataSize = recordData[index] index = index + int(dataSize) + 1 fullDataSize = dataSize+2 strData = str(proxiesInStreams[recordHeader[0]][proxyIndex]) if lengthOffset<len(strData): lengthOffset=len(strData) dataInfo.append(strData) dataInfoSize.append(fullDataSize) # if verbose: # print str(proxiesInStreams[recordHeader[0]][proxyIndex]) + " size "+str(fullDataSize) if recordHeader[0] == eventIndex: accumulatedData[proxyIndex][0] = accumulatedData[proxyIndex][0]+fullDataSize if accumulatedData[proxyIndex][1] > fullDataSize: accumulatedData[proxyIndex][1] = fullDataSize if accumulatedData[proxyIndex][2] < fullDataSize: accumulatedData[proxyIndex][2] = fullDataSize if verbose: print for idx in xrange(0,len(dataInfo)): s = dataInfo[idx] print "%s size %s"%(s.ljust(lengthOffset),dataInfoSize[idx]) except EOFError: if verbose and endReason: print "########################################################" print endReason print "########################################################" break index = 0 if numEvents == 0: raise "no events found" if verbose: print "######### Summary ##########" print "Proxies in %s:"%iStream for item in eventProxies: print "%s : %i %f %i" % (item, accumulatedData[index][1], float(accumulatedData[index][0])/float(numEvents), accumulatedData[index][2]) index = index + 1 print "First sync value :",firstSV print "Last sync value :",lastSV print "Number of syncValues :",numEvents print
def decodeVersionInfo(fileName): """Decode VersionInfo from beginrun record. VersionInfo consists of: - softwareRelease : string - specificVersionName : string - configurationHash : string - ordinal : unsigned int no packing - ancestors : container of string So, it always grows, e.g. post-p2 file will contains two VersionInfo's, one for itself and one for it's parent. So, the underlying algorithm creates a list of VersionInfo's in the following format: [(childTag,[softRel,svName,hash,id,parent1,parent2,...]),(parentTag,[...])] This method returns (svName,[listOfParents]) tuple """ emptyTuple = ('','','','') PDSSIGNATURE=3141592 # magic number for PDS format, as it should be pdsSignature=0 # signature we read from given PDS file fileDesc = open(fileName,'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # to handle endianess, read pdsSignature pdsSignature = headerHeader[0]>>8 pds_utils.pdsSignature=pdsSignature pds_utils.PDSSIGNATURE=PDSSIGNATURE needToSwap = 0 if pdsSignature != PDSSIGNATURE: needToSwap = 1 headerHeader.byteswap() streamNames = pds_utils.name_list_from_file(fileDesc) shProxyNames = pds_utils.name_list_from_file(fileDesc) shNames = pds_utils.name_list_from_file(fileDesc) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] -fileDesc.tell()/4 +3) if needToSwap: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) # want to only look at stuff in beginrun stream if not streamNames.count('beginrun'): return emptyTuple eventIndex = streamNames.index('beginrun') # create a structure to hold our info eventProxies = proxiesInStreams[eventIndex] accumulatedData = [] foundVersionInfo= 0 for proxy in eventProxies: accumulatedData.append([0,488888888,0]) if proxy[0]=='VersionInfo': foundVersionInfo=1 break # check if VersionInfo is present in a proxy list if not foundVersionInfo: return emptyTuple # start to read the rest of the file numEvents = 0 nWordSize = 4 # we use to use 32-bit words, which is 4 bytes firstSV ="" lastSV ="" versionInfoList = [] versionInfoDict = {} while 1: try: endReason = "" recordHeader = array.array('I') recordHeader.fromfile(fileDesc,5) if needToSwap: recordHeader.byteswap() uid = ((recordHeader[4]<<32)|recordHeader[3]) if not len(firstSV): firstSV="%d/%d/%d"%(recordHeader[1],recordHeader[2],uid) lastSV="%d/%d/%d"%(recordHeader[1],recordHeader[2],uid) endReason = "bad Record: record size" recordDataLength = array.array('I') recordDataLength.fromfile(fileDesc,1) if needToSwap: recordDataLength.byteswap() if recordHeader[0]>len(streamNames)-1: break endReason = "bad Record: record (type "+streamNames[recordHeader[0]]+") (size " + str(recordDataLength[0]) +")" curStream = streamNames[recordHeader[0]] if curStream!='beginrun': continue begRecPos = fileDesc.tell() recordData= array.array('I') recordData.fromfile(fileDesc, recordDataLength[0]-1) if needToSwap: recordData.byteswap() testLastWord= array.array('I') testLastWord.fromfile(fileDesc,1) if needToSwap: testLastWord.byteswap() endRecPos = fileDesc.tell() ############ remove later this block, it's a full dataRecord fileDesc.seek(begRecPos) cData = array.array('c') cData.fromfile(fileDesc, (recordDataLength[0])*nWordSize) ################ fileDesc.seek(begRecPos) nReadWords = 0 while nReadWords<(recordDataLength[0]-1): # let's read two words: proxy index and number of words used for Proxy's data proxyData = array.array('I') proxyData.fromfile(fileDesc,2) nReadWords+=2 if needToSwap: proxyData.byteswap() proxyIndex = proxyData[0] productionTag = eventProxies[proxyIndex][2] proxyLength= proxyData[1] if eventProxies[proxyIndex][0]=='VersionInfo': # now let's read proxy's data versionInfo, dict = proxyReader(fileDesc,needToSwap,proxyLength) for key in dict.keys(): if versionInfoDict.has_key(key): if not versionInfoDict[key].count(dict[key]): versionInfoDict[key]+= [dict[key]] else: versionInfoDict[key] = [dict[key]] if not versionInfoList.count((productionTag,versionInfo)): versionInfoList.append((productionTag,versionInfo)) else: fileDesc.seek(proxyLength*4,1) nReadWords+=proxyLength # read last word of the recordData lastWord = array.array('I') lastWord.fromfile(fileDesc,1) if needToSwap: lastWord.byteswap() if lastWord[0]!=recordDataLength[0] or fileDesc.tell()!=endRecPos: raise "While decoding proxies we went too far" # once we reach this point, we found VersionInfo in beginrun record break except EOFError: break keys = versionInfoDict.keys() maxKey = max(keys) svName = versionInfoDict[maxKey][0] keys.remove(maxKey) parents= "" if len(keys): parents= versionInfoDict[max(keys)] return (svName,parents,versionInfoList,versionInfoDict)
def dump_old(fileName,verbose=1): """Dump content of pds location file to stdout""" # define signature of location file global PDSSIGNATURE pds_utils.PDSSIGNATURE=PDSSIGNATURE fileDesc = open(fileName,'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) global pdsSignature pdsSignature=headerHeader[0]>>8 pds_utils.pdsSignature=pdsSignature if pdsSignature != PDSSIGNATURE: print "File was produced on another endian machine, byte swapping is enabled" # print "File signature",pdsSignature,"and it should be ",PDSSIGNATURE if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() nFiles = array.array('I') nFiles.fromfile(fileDesc,1) if pdsSignature != PDSSIGNATURE: nFiles.byteswap() # print "Number of file identifiers:",nFiles fileIDs = array.array('I') fileIDs.fromfile(fileDesc,2*nFiles[0]) if pdsSignature != PDSSIGNATURE: fileIDs.byteswap() # print "fileIDs",fileIDs listOfFileIDs=[] for i in xrange(0,len(fileIDs),2): lowerMostId=fileIDs[i] upperMostId=fileIDs[i+1] fileID = ((upperMostId<<32)|lowerMostId) # print "fileID",lowerMostId,upperMostId,fileID listOfFileIDs.append(fileID) print "File identifiers:", for x in listOfFileIDs: print "%d"%x, print streamNames = pds_utils.name_list_from_file(fileDesc) print "StreamNames :", for x in streamNames: print x, print # grab the hash hash = array.array('I') hash.fromfile(fileDesc,5) print "File hash :", for tByte in hash: print tByte, # print hex(tByte), tByte print "\n" # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] -fileDesc.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() # print restOfHeader if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" dataKeysInStreams = find_datakeys_in_streams(restOfHeader[:-1], streamNames) for idx in xrange(0,len(streamNames)): print "List of data keys in stream:",streamNames[idx] print pds_utils.printProxies(dataKeysInStreams[idx]) # print dataKeysInStreams maxDataKeys = 0 for dataKeys in dataKeysInStreams: if maxDataKeys < len(dataKeys): maxDataKeys = len(dataKeys) # determine size of a record nWordsForFileIDsInRecord = nFiles[0]*2 nCharsForDataKeysInRecord = maxDataKeys while ((nCharsForDataKeysInRecord + 4) %8): nCharsForDataKeysInRecord += 1 if verbose: print "nWordsForFileIDsInRecord : ",nWordsForFileIDsInRecord print " maxDataKeys : ",maxDataKeys print "nCharsForDataKeysInRecord: ",nCharsForDataKeysInRecord count = 0 while 1: try: endReason = "End Of File" fileOffsets = array.array('I') fileOffsets.fromfile(fileDesc,nWordsForFileIDsInRecord) if pdsSignature != PDSSIGNATURE: fileOffsets.byteswap() recordType = array.array('I') recordType.fromfile(fileDesc,1) if pdsSignature != PDSSIGNATURE: recordType.byteswap() endReason = "bad Record: fileIDS" if verbose: print "********" print "# syncValue:",count if len(streamNames): print streamNames[recordType[0]] for index in xrange(0,nFiles[0]): print "file: "+str(fileIDs[index*2])+" "+str(fileIDs[index*2+1])+"->"+str(fileOffsets[index*2])+" "+str(fileOffsets[index*2+1]) dataKeys = array.array('B') dataKeys.fromfile(fileDesc,nCharsForDataKeysInRecord) if pdsSignature != PDSSIGNATURE: dataKeys.byteswap() if verbose: print dataKeys #for index in xrange(0,nCharsForDataKeysInRecord): # print " "+str(index)+" "+str(dataKeys[index]) count+=1 except EOFError: if verbose: print endReason break
def locationFileParser(locFileName): """Parse header of location file and read data types""" # define signature of location file global PDSSIGNATURE pds_utils.PDSSIGNATURE=PDSSIGNATURE # print "locFile",locFileName locFile = open(locFileName,'rb') headerHeader = array.array('I') headerHeader.fromfile(locFile, 4) # to handle endianess, read pdsSignature global pdsSignature pdsSignature = headerHeader[0]>>8 pds_utils.pdsSignature=pdsSignature needToSwap = 0 if pdsSignature != PDSSIGNATURE: needToSwap=1 headerHeader.byteswap() # read file identifyer list nFiles = headerHeader[3] fileIDs= array.array('I') fileIDs.fromfile(locFile,2*nFiles) if pdsSignature != PDSSIGNATURE: fileIDs.byteswap() # read stream name list streamNames = pds_utils.name_list_from_file(locFile) # read hash hash = array.array('I') hash.fromfile(locFile,5) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(locFile, headerHeader[2]-locFile.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: self.printError([],"Didn't parse correctly location file") sys.exit(1) dataKeysInStreams = find_datakeys_in_streams(restOfHeader[:-1], streamNames) # form dictionary of pairs {(stream,[datakeys])} oDict = {} maxDataKeys=0 for idx in xrange(0,len(streamNames)): oDict[streamNames[idx]]=dataKeysInStreams[idx] if len(dataKeysInStreams[idx])>maxDataKeys: maxDataKeys=len(dataKeysInStreams[idx]) nCharsForDataKeysInRecord = maxDataKeys while ((nCharsForDataKeysInRecord + 4) %8): nCharsForDataKeysInRecord += 1 recordSize = len(fileIDs)+1+nCharsForDataKeysInRecord # in 32-bit words # create pds file Ids pdsIDList = [] for idx in xrange(0,len(fileIDs),2): pdsID = (fileIDs[idx+1]<<32)|fileIDs[idx] pdsIDList.append(int(pdsID)) posOfFirstRecord=locFile.tell() locFile.close() return [streamNames,pdsIDList,oDict,hash,dataKeysInStreams,recordSize,posOfFirstRecord,needToSwap]
def buildLocationHeader(iPDSFileName, iFileID): """Build a PDS location header, from given pds file name and file id""" # print "iPDSFileName",iPDSFileName pdsFile = open(iPDSFileName,'rb') pdsFileID = int(iFileID) pdsHeader = array.array('I') pdsHeader.fromfile(pdsFile, 3) global pdsSignature pdsSignature=pdsHeader[0]>>8 pds_utils.PDSSIGNATURE=PDSSIGNATURE pds_utils.pdsSignature=pdsSignature if pdsSignature != PDSSIGNATURE: pdsHeader.byteswap() needToSwap=1 else: needToSwap=0 locationHeader = array.array('I') locationHeader.fromlist([2951413*256]) #file id for master index locationHeader.append(0) #remaining number of words in of header locationHeader.append(0) #associated file list fileList = array.array('I') # for now only set the low word for the file ID fileList.fromlist([1,pdsFileID,0]) locationHeader =locationHeader + fileList # now get the names of the streams # the location file and the PDS file use the same format for the # stream names so just have to copy the info numberOfRecordNameWords = array.array('I') numberOfRecordNameWords.fromfile(pdsFile,1) if pdsSignature != PDSSIGNATURE: numberOfRecordNameWords.byteswap() #need number of names pdsFile.seek(4*4) nameChars = array.array('c') nameChars.fromfile(pdsFile,numberOfRecordNameWords[0]*4) if pdsSignature != PDSSIGNATURE: nameChars.byteswap() streamNames = pds_utils.name_list(nameChars) sortedStreamNames = list(streamNames) sortedStreamNames.sort() #build conversion from new stream index to old index newStreamIndex2OldIndex = [] oldStreamIndex2NewIndex = [0]*len(streamNames) streamNameString ="" for name in sortedStreamNames: newStreamIndex2OldIndex.append(streamNames.index(name)) oldStreamIndex2NewIndex[newStreamIndex2OldIndex[-1] ] = len(newStreamIndex2OldIndex)-1 streamNameString = streamNameString+name +"\0" streamNameString = streamNameString[:-1] while 0 != len(streamNameString) % 4: streamNameString = streamNameString + "\0" nameWords = array.array('I') nameWords.fromstring(streamNameString) locationHeader = locationHeader + numberOfRecordNameWords+nameWords #print streamNames shProxyNames = pds_utils.name_list_from_file(pdsFile) #print shProxyNames #print len(shProxyNames) shNames = pds_utils.name_list_from_file(pdsFile) #print shNames #grab the rest of the header restOfHeader = array.array('I') #the header size ignores the first 3 words in the event restOfHeader.fromfile(pdsFile, pdsHeader[2] -pdsFile.tell()/4 +3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() #print restOfHeader if restOfHeader[-1] != pdsHeader[2]: raise "header inconsistent" #create the list of 'type' 'usage' 'production' tags for each stream def proxySort(a,b): #print a, b temp = cmp(a[0],b[0]) if 0 == temp: temp=cmp(a[1],b[1]) if 0 == temp: temp=cmp(a[2],b[2]) return temp proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) #sortProxiesInStreams = proxiesInStreams for proxies in proxiesInStreams: proxies.sort(proxySort) #reserve space for our hash dataKeyHashIndex = len(locationHeader) dataKeyHashArray = array.array('I') dataKeyHashArray.fromlist([0]*5) locationHeader += dataKeyHashArray maxNProxies = 0 nProxies = array.array('I') nProxies.fromlist([0]) proxiesArray = [] for oldIndex in newStreamIndex2OldIndex: #print oldIndex proxies = proxiesInStreams[oldIndex] nProxies[0] = len(proxies) if nProxies[0] > maxNProxies: maxNProxies = nProxies[0] locationHeader +=nProxies datakeys = "" #now add each string proxyList= [] for proxy in proxies: proxyList.append(proxy) for key in proxy: datakeys +=key+"\0" while( len(datakeys) % 4 ): datakeys +="\0" #print len(datakeys) #print datakeys #print len(datakeys) dataKeysArray=array.array('I') dataKeysArray.fromstring(datakeys) # proxiesArray+=[dataKeysArray.tolist()] proxiesArray+=[proxyList] #nProxies[0] = len(dataKeysArray) #locationHeader += nProxies locationHeader +=dataKeysArray #calculate the hash hash = sha.new( locationHeader[dataKeyHashIndex+1:].tostring() ).digest() #print sha.new( locationHeader[dataKeyHashIndex+1:].tostring() ).hexdigest() dataKeyHashArray = array.array('I') dataKeyHashArray.fromstring(hash) locationHeader[dataKeyHashIndex:dataKeyHashIndex+5]=dataKeyHashArray locationHeaderBeforePadding = locationHeader.tolist() #pad header so Records begin on 8 byte boundary if not (len(locationHeader) % 2): locationHeader.fromlist([0]) headerLength = len(locationHeader)+1-3 locationHeader.fromlist([headerLength]) locationHeader[2] = headerLength #pad the max number of ASUs to be a multiple of 8 nASUperRecord = maxNProxies while ((nASUperRecord + 4) % 8): nASUperRecord +=1 #For each stream, create the ASU to file ID list whichFileForStream = [] whichFileForStreamFake = [] for proxies in proxiesInStreams: whichFile = array.array('B') fakeArray = array.array('B') whichFile.fromlist([0]*len(proxies)+[255]*(nASUperRecord-len(proxies))) for x in xrange(0,len(whichFile)): fakeArray.append(255) #print whichFile whichFileForStream.append(whichFile) whichFileForStreamFake.append(fakeArray) return (pdsFile,locationHeader,locationHeaderBeforePadding,nameWords,dataKeyHashArray,proxiesArray,streamNames,oldStreamIndex2NewIndex,newStreamIndex2OldIndex,whichFileForStream,whichFileForStreamFake,needToSwap)
def decodeVersionInfo(fileName): """Decode VersionInfo from beginrun record. VersionInfo consists of: - softwareRelease : string - specificVersionName : string - configurationHash : string - ordinal : unsigned int no packing - ancestors : container of string So, it always grows, e.g. post-p2 file will contains two VersionInfo's, one for itself and one for it's parent. So, the underlying algorithm creates a list of VersionInfo's in the following format: [(childTag,[softRel,svName,hash,id,parent1,parent2,...]),(parentTag,[...])] This method returns (svName,[listOfParents]) tuple """ emptyTuple = ('', '', '', '') PDSSIGNATURE = 3141592 # magic number for PDS format, as it should be pdsSignature = 0 # signature we read from given PDS file fileDesc = open(fileName, 'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # to handle endianess, read pdsSignature pdsSignature = headerHeader[0] >> 8 pds_utils.pdsSignature = pdsSignature pds_utils.PDSSIGNATURE = PDSSIGNATURE needToSwap = 0 if pdsSignature != PDSSIGNATURE: needToSwap = 1 headerHeader.byteswap() streamNames = pds_utils.name_list_from_file(fileDesc) shProxyNames = pds_utils.name_list_from_file(fileDesc) shNames = pds_utils.name_list_from_file(fileDesc) # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] - fileDesc.tell() / 4 + 3) if needToSwap: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) # want to only look at stuff in beginrun stream if not streamNames.count('beginrun'): return emptyTuple eventIndex = streamNames.index('beginrun') # create a structure to hold our info eventProxies = proxiesInStreams[eventIndex] accumulatedData = [] foundVersionInfo = 0 for proxy in eventProxies: accumulatedData.append([0, 488888888, 0]) if proxy[0] == 'VersionInfo': foundVersionInfo = 1 break # check if VersionInfo is present in a proxy list if not foundVersionInfo: return emptyTuple # start to read the rest of the file numEvents = 0 nWordSize = 4 # we use to use 32-bit words, which is 4 bytes firstSV = "" lastSV = "" versionInfoList = [] versionInfoDict = {} while 1: try: endReason = "" recordHeader = array.array('I') recordHeader.fromfile(fileDesc, 5) if needToSwap: recordHeader.byteswap() uid = ((recordHeader[4] << 32) | recordHeader[3]) if not len(firstSV): firstSV = "%d/%d/%d" % (recordHeader[1], recordHeader[2], uid) lastSV = "%d/%d/%d" % (recordHeader[1], recordHeader[2], uid) endReason = "bad Record: record size" recordDataLength = array.array('I') recordDataLength.fromfile(fileDesc, 1) if needToSwap: recordDataLength.byteswap() if recordHeader[0] > len(streamNames) - 1: break endReason = "bad Record: record (type " + streamNames[ recordHeader[0]] + ") (size " + str(recordDataLength[0]) + ")" curStream = streamNames[recordHeader[0]] if curStream != 'beginrun': continue begRecPos = fileDesc.tell() recordData = array.array('I') recordData.fromfile(fileDesc, recordDataLength[0] - 1) if needToSwap: recordData.byteswap() testLastWord = array.array('I') testLastWord.fromfile(fileDesc, 1) if needToSwap: testLastWord.byteswap() endRecPos = fileDesc.tell() ############ remove later this block, it's a full dataRecord fileDesc.seek(begRecPos) cData = array.array('c') cData.fromfile(fileDesc, (recordDataLength[0]) * nWordSize) ################ fileDesc.seek(begRecPos) nReadWords = 0 while nReadWords < (recordDataLength[0] - 1): # let's read two words: proxy index and number of words used for Proxy's data proxyData = array.array('I') proxyData.fromfile(fileDesc, 2) nReadWords += 2 if needToSwap: proxyData.byteswap() proxyIndex = proxyData[0] productionTag = eventProxies[proxyIndex][2] proxyLength = proxyData[1] if eventProxies[proxyIndex][0] == 'VersionInfo': # now let's read proxy's data versionInfo, dict = proxyReader(fileDesc, needToSwap, proxyLength) for key in dict.keys(): if versionInfoDict.has_key(key): if not versionInfoDict[key].count(dict[key]): versionInfoDict[key] += [dict[key]] else: versionInfoDict[key] = [dict[key]] if not versionInfoList.count((productionTag, versionInfo)): versionInfoList.append((productionTag, versionInfo)) else: fileDesc.seek(proxyLength * 4, 1) nReadWords += proxyLength # read last word of the recordData lastWord = array.array('I') lastWord.fromfile(fileDesc, 1) if needToSwap: lastWord.byteswap() if lastWord[0] != recordDataLength[0] or fileDesc.tell( ) != endRecPos: raise "While decoding proxies we went too far" # once we reach this point, we found VersionInfo in beginrun record break except EOFError: break keys = versionInfoDict.keys() maxKey = max(keys) svName = versionInfoDict[maxKey][0] keys.remove(maxKey) parents = "" if len(keys): parents = versionInfoDict[max(keys)] return (svName, parents, versionInfoList, versionInfoDict)
def dump_old(fileName, verbose=1): """Dump content of pds location file to stdout""" # define signature of location file global PDSSIGNATURE pds_utils.PDSSIGNATURE = PDSSIGNATURE fileDesc = open(fileName, 'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) global pdsSignature pdsSignature = headerHeader[0] >> 8 pds_utils.pdsSignature = pdsSignature if pdsSignature != PDSSIGNATURE: print "File was produced on another endian machine, byte swapping is enabled" # print "File signature",pdsSignature,"and it should be ",PDSSIGNATURE if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() nFiles = array.array('I') nFiles.fromfile(fileDesc, 1) if pdsSignature != PDSSIGNATURE: nFiles.byteswap() # print "Number of file identifiers:",nFiles fileIDs = array.array('I') fileIDs.fromfile(fileDesc, 2 * nFiles[0]) if pdsSignature != PDSSIGNATURE: fileIDs.byteswap() # print "fileIDs",fileIDs listOfFileIDs = [] for i in xrange(0, len(fileIDs), 2): lowerMostId = fileIDs[i] upperMostId = fileIDs[i + 1] fileID = ((upperMostId << 32) | lowerMostId) # print "fileID",lowerMostId,upperMostId,fileID listOfFileIDs.append(fileID) print "File identifiers:", for x in listOfFileIDs: print "%d" % x, print streamNames = pds_utils.name_list_from_file(fileDesc) print "StreamNames :", for x in streamNames: print x, print # grab the hash hash = array.array('I') hash.fromfile(fileDesc, 5) print "File hash :", for tByte in hash: print tByte, # print hex(tByte), tByte print "\n" # grab the rest of the header restOfHeader = array.array('I') # the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] - fileDesc.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() # print restOfHeader if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" dataKeysInStreams = find_datakeys_in_streams(restOfHeader[:-1], streamNames) for idx in xrange(0, len(streamNames)): print "List of data keys in stream:", streamNames[idx] print pds_utils.printProxies(dataKeysInStreams[idx]) # print dataKeysInStreams maxDataKeys = 0 for dataKeys in dataKeysInStreams: if maxDataKeys < len(dataKeys): maxDataKeys = len(dataKeys) # determine size of a record nWordsForFileIDsInRecord = nFiles[0] * 2 nCharsForDataKeysInRecord = maxDataKeys while ((nCharsForDataKeysInRecord + 4) % 8): nCharsForDataKeysInRecord += 1 if verbose: print "nWordsForFileIDsInRecord : ", nWordsForFileIDsInRecord print " maxDataKeys : ", maxDataKeys print "nCharsForDataKeysInRecord: ", nCharsForDataKeysInRecord count = 0 while 1: try: endReason = "End Of File" fileOffsets = array.array('I') fileOffsets.fromfile(fileDesc, nWordsForFileIDsInRecord) if pdsSignature != PDSSIGNATURE: fileOffsets.byteswap() recordType = array.array('I') recordType.fromfile(fileDesc, 1) if pdsSignature != PDSSIGNATURE: recordType.byteswap() endReason = "bad Record: fileIDS" if verbose: print "********" print "# syncValue:", count if len(streamNames): print streamNames[recordType[0]] for index in xrange(0, nFiles[0]): print "file: " + str(fileIDs[index * 2]) + " " + str( fileIDs[index * 2 + 1]) + "->" + str( fileOffsets[index * 2]) + " " + str( fileOffsets[index * 2 + 1]) dataKeys = array.array('B') dataKeys.fromfile(fileDesc, nCharsForDataKeysInRecord) if pdsSignature != PDSSIGNATURE: dataKeys.byteswap() if verbose: print dataKeys #for index in xrange(0,nCharsForDataKeysInRecord): # print " "+str(index)+" "+str(dataKeys[index]) count += 1 except EOFError: if verbose: print endReason break
def pdsParser(file, what=""): """PDS file parser. Return a list of run/uid/sync.values/proxies in given file""" fileDesc = open(file, 'rb') headerHeader = array.array('I') headerHeader.fromfile(fileDesc, 3) # global pdsSignature pdsSignature = headerHeader[0] >> 8 pds_utils.PDSSIGNATURE = PDSSIGNATURE pds_utils.pdsSignature = pdsSignature if pdsSignature != PDSSIGNATURE: headerHeader.byteswap() # invoke name_list_from_file with additional argument 1 which means # return number of words for those names streamNames, streamNamesWords = pds_utils.name_list_from_file(fileDesc, 1) shProxyNames, shProxyNamesWords = pds_utils.name_list_from_file( fileDesc, 1) shNames, shNamesWords = pds_utils.name_list_from_file(fileDesc, 1) #grab the rest of the header restOfHeader = array.array('I') #the header size ignores the first 3 words in the event restOfHeader.fromfile(fileDesc, headerHeader[2] - fileDesc.tell() / 4 + 3) if pdsSignature != PDSSIGNATURE: restOfHeader.byteswap() if restOfHeader[-1] != headerHeader[2]: raise "header inconsistent" proxiesInStreams = pds_utils.find_proxies_in_streams( restOfHeader, streamNames, shProxyNames) # define proxy dictionary dict={'stream':listOfProxiesInThatStream} pList = [] for str in streamNames: strIndex = streamNames.index(str) pList.append((str, proxiesInStreams[strIndex])) posOfLastHeaderWords = fileDesc.tell() #start to read the rest of the file runList = [] uidList = [] syncList = [] while 1: try: endReason = "" recordHeader = array.array('I') recordHeader.fromfile(fileDesc, 5) if pdsSignature != PDSSIGNATURE: recordHeader.byteswap() # read stream id and convert it to stream name streamId = int(recordHeader[0]) stream = streamNames[streamId] # read uid uid = ((recordHeader[4] << 32) | recordHeader[3]) # form syncValue syncValue = (recordHeader[1], recordHeader[2], uid, stream) syncList.append(syncValue) # read run record runNumber = recordHeader[1] if len(runList) == 0 or (uid != uidList[-1] or runNumber != runList[-1]): runList.append(runNumber) uidList.append(uid) # read the record data recordDataSize = array.array('I') recordDataSize.fromfile(fileDesc, 1) if pdsSignature != PDSSIGNATURE: recordDataSize.byteswap() # print "recordDataSize",recordDataSize recordData = array.array('I') recordData.fromfile(fileDesc, recordDataSize[0] - 1) if pdsSignature != PDSSIGNATURE: recordData.byteswap() recordDataEnd = array.array('I') recordDataEnd.fromfile(fileDesc, 1) if pdsSignature != PDSSIGNATURE: recordDataEnd.byteswap() if recordDataSize[0] != recordDataEnd[0]: print "Error of reading record data" sys.exit(1) except EOFError: break # runList.sort() # uidList.sort() # syncList.sort() pList.sort() if what == "run": return runList elif what == "uid": return uidList elif what == "syncValue": return syncList elif what == "proxies": return pList returnList = [runList, uidList, pList, syncList] returnList += [streamNames, streamNamesWords] returnList += [shProxyNames, shProxyNamesWords] returnList += [shNames, shNamesWords] returnList += [proxiesInStreams, posOfLastHeaderWords] return returnList