def sambayear(year): ''' returns the Samba date format year string value for a given year. For example, if year = 2012, this function will return 'm' The input arguemnt, year, must be greater than or equal to 2000 and less than 2026 ''' year = util.formatvalue(year) expmsg = '%s, must be integer greater than or equal to 2000 and less than 2026' % year if isinstance(year, str) or isinstance(year, float): raise TypeError(expmsg) if year < 2000 or year >= 2026: raise TypeError(expmsg) return chr( year - 2000 + ord('a') )
def sambamonth(month): ''' returns the Samba date format month string value for a given month. For example, if month = 1, this function will return 'a'. The input argument, month, must be number from 1 to 12 ''' month = util.formatvalue(month) expmsg = '%s, must be integer between 1 and 12, inclusively' % month if isinstance(month, str) or isinstance(month, float): raise TypeError(expmsg) if month < 1 or month > 12: raise TypeError(expmsg) return chr( month - 1 + ord('a') )
def readsambafileheader(sambafile): header = {} firstline = sambafile.readline() if firstline.rstrip() == '* Archive SAMBA': firstline = sambafile.readline() #skip the next line if firstline.rstrip().startswith('* Version ') == False: return False #force the file to hold this formatting. if not, then we'll deal with it later else: return False #this doesn't appear to be a SAMBA data file while True: line = sambafile.readline() if line.rstrip().endswith('* ----------'): print 'Finished reading Samba File Header' return header elif line == '': print 'prematurely found the end of the file while reading the file header' sys.exit(-1) #we shouldn't reach the end of the file already elif line.startswith('#'): #skip lines that are comments pass #skip to the next line else: line = line.rstrip() #remove the trailing \n characters if line.count('=') > 0: #make sure its a key = value line alist = line.split('=') if len(alist) >= 2: key = alist[0].strip()#.lower().replace('.','_').replace('-','_') vlist = alist[1].split('#') value = formatvalue(vlist[0].strip()) #print repr(key) + ' : ' + repr(value) if key == 'Date': date = value.split('/') value = {'year':int(date[0]) + 2000, 'month':int(date[1]), 'day':int(date[2])} header[key] = value print 'Reading Samba Partition Header. We ended in a weird state.' sys.exit(-1)
def readrunheader(sambafile): line = '' global _localRunDict while True: line = sambafile.readline().rstrip() #need to check if we got to the end of the file, otherwise this will probably #run forever. if line == '': print 'found end of file prematurely while readin the samba run header.' sys.exit(-1) if line == '# ===== Entete de run =====': print 'Found start of run %s. Creating Run Document' % os.path.basename(sambafile.name) _localRunDict['_id'] = 'run_' + os.path.basename(sambafile.name) + '_kdatascript' break #found the start of the run header, now read the lines #until the data is found while True: line = sambafile.readline().rstrip() if line.startswith('#'): pass elif line == '* Donnees': print 'Finished Run Header', _localRunDict['_id'] return _localRunDict elif line == '': print 'Prematurely found an empty line while reading the Run Header' sys.exit(-1) #we shouldn't reach the end of the file already else: if line.count('=') > 0: alist = line.split('=') if len(alist) >= 2: key = alist[0].strip()#.lower().replace('.','_').replace('-','_') vlist = alist[1].split('#') value = formatvalue(vlist[0].strip()) _localRunDict[key] = value
def uploadFile(fname, uri, dbname): print 'Upload contents of %s to %s/%s' % (fname, uri, dbname) # #connect to the db theServer = Server(uri) db = theServer.get_or_create_db(dbname) #loop on file for upload reader = DictReader(open(fname, 'rU'), dialect = 'excel') #used for bulk uploading docs = list() #each line in the reader object is a document that will be uploaded #to the databse. for doc in reader: #parse the doc so that integers and floats are not stored as strings newdoc = parseDoc(doc) #enforce some sort of schema. that is, require the existence of a set of keys in the database documents requiredSet = set(['bolometer', 'channels', 'repartition_number', 'year', 'month', 'day']) if requiredSet < set(newdoc.keys()) is False: print 'Quitting! Your CVS file map MUST have the following columns' print ', '.join([x for x in requiredSet]) sys.exit(1) #if the bolometer field is empty, skip this document if newdoc['bolometer'] == 0 or newdoc['bolometer'] == '': continue #skip to the next line in the file. this one is empty #now deal with special circumstances to convert the spreadsheet values into #something a little nicer for the database / analysis tools. #make sure bolometer characters are all uppercase to be consistent with past analysis and Samba files newdoc['bolometer'] = string.upper(str(newdoc['bolometer'])) #strip "RUN" from the Run field newdoc['Run'] = formatvalue( string.replace(str(newdoc['Run']), 'RUN','').strip() ) #reformat the date del newdoc['date'] #reformat the date to a single dictionary newdoc['date_valid'] = {} for dk in ['year', 'month', 'day']: try: newdoc['date_valid'][dk] = newdoc[dk] del newdoc[dk] except: pass yy,mm,dd = newdoc['date_valid']['year'], newdoc['date_valid']['month'], newdoc['date_valid']['day'] validdate = datetime.datetime(yy,mm,dd,tzinfo=pytz.utc) newdoc['date_valid_unixtime'] = calendar.timegm( validdate.utctimetuple() ) newdoc['date_valid_isoformat'] = validdate.isoformat() #add the type, author, content, and date_filed fields. newdoc['type'] = 'bbolo_position_map' newdoc['author'] = 'Bernard Paul' newdoc['content'] = 'Bolometer and readout electronics mapping' dd = datetime.datetime.utcnow() newdoc['date_filed'] = {'year':dd.year, 'month':dd.month, 'day':dd.day, 'hour':dd.hour, 'minute':dd.minute, 'second':dd.second} newdoc['date_filed_unixtime'] = time.time() newdoc['date_filed_isoformat'] = dd.isoformat() + '+0:00' #add the +0:00 to indicate clearly that this is UTC #parse the channels field into an array holding the channels that are read out parsechannels(newdoc) #append the newdoc to the docs list. The docs list will be uploaded #in 'bulk' mode, which is faster than uploading individual documents docs.append(newdoc) print newdoc['bolometer'], newdoc['channels'], newdoc['date_valid_isoformat'] db.bulk_save(docs)
def uploadFile(filename, uri, dbname, overWrite = False): if sut.isvalidsambadatafilename(os.path.basename(filename)) is False: return False theServer = Server(uri) db = theServer.get_or_create_db(dbname) #print db.info() #read the run header sambafile = open(filename) global _localRunDict _localRunDict = {} runheader = readrunheader(sambafile) _localRunDict['author'] = 'Samba' _localRunDict['content'] = 'Samba DAQ document for a particular run. Use this database entry to track the progress of the processing of this data' _localRunDict['type'] = 'daqdocument' _localRunDict['date_uploaded'] = time.time() runname = os.path.basename(sambafile.name) runsplit = runname.split('_') _localRunDict['file'] = os.path.realpath(filename) _localRunDict['file_lastmodified'] = os.path.getctime(filename) _localRunDict['run_name'] = formatvalue(runsplit[0]) _localRunDict['file_number'] = int(runsplit[1]) sambafile.close() #close and then reopen the file, just to make it easy to get to the start # of the run. #read the samba file header sambafile = open(filename) sambaheader = readsambafileheader(sambafile) #now add the key/values to the _localRunDict document for k, v in sambaheader.items(): if _localRunDict.has_key(k) is False: _localRunDict[k] = v #now, loop through and read the bolometer header files boloArray = list() lastline = '' while True: boloheader = readboloheader(sambafile) if isinstance(boloheader,dict): boloArray.append(boloheader) elif isinstance(boloheader, str): lastline = copy.copy(boloheader) print 'Not a Dictionary. We are done reading the bolometer headers.' break else: print 'Not a Dictionary. We are done reading the bolometer headers.' break _localRunDict['Detecteurs'] = boloArray #now read through the channel configuration values # the while loop above quits when the the readboloheader doesn't return # a dictionary. Instead, it returns the next line in the header, which # should be the start of the channel configurations. (assuming that Samba # doesn't change its format # channelArray = list() voiepart = lastline[lastline.find('* Voie'):] channelName = voiepart[voiepart.find('"'):].strip('":\n') while True: chanheaderoutput = readchannelheader(sambafile, channelName) channelheader = chanheaderoutput[0] voiepart = chanheaderoutput[1][chanheaderoutput[1].find('* Voie'):] channelName = voiepart[voiepart.find('"'):].strip('":\n') if isinstance(channelheader,dict): channelArray.append(channelheader) else: print 'Read Channel Header didn\'t return a dictionary.' if chanheaderoutput[2] == False: print 'Channel Header output False - Done reading Channel Headers.' # okay, this tells us that we're done break _localRunDict['Voies'] = channelArray _localRunDict['status'] = 'closed' _localRunDict['hostipaddress'] = socket.gethostbyname( socket.gethostname() ) _localRunDict['hostname'] = socket.gethostname() #_localRunDict['size_in_bytes'] = os.path.getsize(_localRunDict['file']) #don't allow this script to rewrite a doc to the database! #if you want to do that, then delete the doc you want to recreate #or use the overWrite option doc_exist_status = db.doc_exist(_localRunDict['_id']) if doc_exist_status and overWrite: print 'doc exists on database, but overWrite was true. so i overwrite it' _localRunDict['_rev'] = db.get_rev(_localRunDict['_id']) elif doc_exist_status and not overWrite: print 'doc exists on database! exiting without uploading to database' sambafile.close() return False res = db.save_doc(_localRunDict) sambafile.close() return res['ok']
def readchannelheader(sambafile, voie): '''Due to the structure of the Samba Header, this function returns the a 2-tuple, with the first element being the header dictionary and the second element is the last line read, which should have the channel name of the next channel in the header Also, the channel must be provided by reading the lines in the file If the call to this function occurs after the readboloheader function, then the line should be ready to be parse to determine the voie ''' header = {} header['Voie'] = voie header['bolometer'] = getBolometerName(voie) while True: line = sambafile.readline().rstrip() if line == '* Filtre:': line = sambafile.readline() #print 'Found the Filter. This should be the end of', header['Voie'], '\'s header' while True: if line.find('* Voie') != -1: #print 'Next channel header found', line[line.find('* Voie'):].rstrip() filter = line[:line.find('* Voie')] #header['Filtre'] = filter print 'Finished reading Channel header (end of filter * Voie)' return (header, line, True) elif line.find('* Run') != -1: #print 'End of header. Found Run at end of filter', line[line.find('* Run'):].rstrip() filter = line[:line.find('* Run')] #header['Filtre'] = filter print 'Finished reading Channel header (end of filter * Run)' return (header, line, False) else: #we must have found a filter that appears to have an end-of-line byte in the filter #print 'Found premature end of line in filter' line = sambafile.readline() if line.startswith('* Voie'): #print 'End of header. Found Next Voie at start of line', line[line.find('* Voie'):].rstrip() print 'Finished reading Channel header (* Voie)' return (header, line, True) if line.startswith('* Run'): #print 'End of header. Found Run at start of line', line[line.find('* Run'):].rstrip() print 'Finished reading Channel header (* Run)' return (header, line, False) elif line == '': print 'Hey! Reached end of file when reading the channel header!?!' return ('notDict', line, False) #we shouldn't reach the end of the file already elif line.startswith('#'): pass #skip lines that are comments else: if line.count('=') > 0: #make sure its a key = value line alist = line.split('=') if len(alist) >= 2: key = alist[0].strip()#lower().replace('.','_').replace('-','_') vlist = alist[1].split('#') value = formatvalue(vlist[0].strip()) header[str(key)] = value print 'I hope I never get here... a weird place to be in the Samba readChanneHeader' return header
def readboloheader(sambafile): header = {} firstline = sambafile.readline() if firstline.strip().startswith('* Detecteur'): alist = firstline.split() detector = alist[2] header['detector'] = detector header['bolometer'] = getBolometerName(detector) else: return firstline #this doesn't appear to be a Bolometer Configuration section while True: line = sambafile.readline() if line.rstrip().endswith('* ----------'): print 'Finished Bolo Header' return header elif line == '': print 'Hey! Reached end of file when reading the samba detector header!' return False #we shouldn't reach the end of the file already elif line.startswith('#'): #skip lines that are comments pass #skip to the next line else: line = line.rstrip() #remove the trailing \n characters if line.count('=') > 0: #make sure its a key = value line alist = line.split('=') if len(alist) >= 2: if alist[0].strip() == 'Bolo.reglages' and alist[1].strip() != '()': #handle the special case rootkey = alist[0].strip()#.lower().replace('.','_').replace('-','_') #print 'root key ' + rootkey rootval = dict() while True: nline = sambafile.readline() if nline.strip().startswith(')') or nline.rstrip().endswith('* ----------'): #print 'found end of bolo.reglages' break if nline.lstrip().startswith('{'): valuelist = nline.strip().split(':=') key = valuelist[0].strip(' {').strip()#.lower().replace('.','_').replace('-','_') value = valuelist[1].strip(' },').strip() rootval[str(key)] = formatvalue(value) header[str(rootkey)] = rootval else: key = alist[0].strip()#lower().replace('.','_').replace('-','_') vlist = alist[1].split('#') value = formatvalue(vlist[0].strip()) header[str(key)] = value print 'Reading Bolometer Header. We ended in a weird state.' sys.exit(-1)