def load_multidataset(filename, cache=False): """ Load a MultiDataSet from a file. First tries to load file as a saved MultiDataSet object, then as a standard text-formatted MultiDataSet. Parameters ---------- filename : string The name of the file cache : bool, optional When set to True, a pickle file with the name filename + ".cache" is searched for and loaded instead of filename if it exists and is newer than filename. If no cache file exists or one exists but it is older than filename, a cache file will be written after loading from filename. Returns ------- MultiDataSet """ try: # a saved MultiDataset object is ok mds = _objs.MultiDataSet(fileToLoadFrom=filename) except: if cache: bReadCache = False cache_filename = filename + ".cache" if _os.path.exists( cache_filename ) and \ _os.path.getmtime(filename) < _os.path.getmtime(cache_filename): try: print "Loading from cache file: ", cache_filename mds = _objs.MultiDataSet(fileToLoadFrom=cache_filename) return mds except: print "Failed to load from cache file" else: print "Cache file not found or is tool old -- one will be created after loading is completed" # otherwise must use standard dataset file format parser = _stdinput.StdInputParser() mds = parser.parse_multidatafile(filename) print "Writing cache file (to speed future loads): ", cache_filename mds.save(cache_filename) else: # otherwise must use standard dataset file format parser = _stdinput.StdInputParser() mds = parser.parse_multidatafile(filename) return mds
def load_gatestring_list(filename, readRawStrings=False): """ Load a gate string list from a file, formatted using the standard text-format. Parameters ---------- filename : string The name of the file readRawStrings : boolean If True, gate strings are not converted to tuples of gate labels. Returns ------- list of GateString objects """ if readRawStrings: rawList = [] for line in open(filename, 'r'): if len(line.strip()) == 0: continue if len(line) == 0 or line[0] == '#': continue rawList.append(line.strip()) return rawList else: std = _stdinput.StdInputParser() return std.parse_stringfile(filename)
def write_multidataset(filename, multidataset, gatestring_list=None, spamLabelOrder=None): """ Write a text-formatted multi-dataset file. Parameters ---------- filename : string The filename to write. multidataset : MultiDataSet The multi data set from which counts are obtained. gatestring_list : list of GateStrings The list of gate strings to include in the written dataset. If None, all gate strings are output. spamLabelOrder : list, optional A list of the SPAM labels in multidataset which specifies the column order in the output file. """ if gatestring_list is not None: if len(gatestring_list) > 0 and not isinstance(gatestring_list[0], _objs.GateString): raise ValueError( "Argument gatestring_list must be a list of GateString objects!" ) else: gatestring_list = multidataset.gsIndex.keys( ) #TODO: make access function for gatestrings? spamLabels = multidataset.get_spam_labels() if spamLabelOrder is not None: assert (len(spamLabelOrder) == len(spamLabels)) assert (all([sl in spamLabels for sl in spamLabelOrder])) assert (all([sl in spamLabelOrder for sl in spamLabels])) spamLabels = spamLabelOrder dsLabels = multidataset.keys() headerString = '## Columns = ' + ", ".join( ["%s %s count" % (dsl, sl) for dsl in dsLabels for sl in spamLabels]) parser = _stdinput.StdInputParser() f = open(filename, 'w') print >> f, headerString for gateString in gatestring_list: #gateString should be a GateString object here gs = gateString.tup #gatestring tuple print >> f, gateString.str + " " + " ".join( [("%g" % multidataset[dsl][gs][sl]) for dsl in dsLabels for sl in spamLabels]) f.close()
def write_dataset(filename, dataset, gatestring_list=None, spamLabelOrder=None): """ Write a text-formatted dataset file. Parameters ---------- filename : string The filename to write. dataset : DataSet The data set from which counts are obtained. gatestring_list : list of GateStrings, optional The list of gate strings to include in the written dataset. If None, all gate strings are output. spamLabelOrder : list, optional A list of the SPAM labels in dataset which specifies the column order in the output file. """ if gatestring_list is not None: if len(gatestring_list) > 0 and not isinstance(gatestring_list[0], _objs.GateString): raise ValueError( "Argument gatestring_list must be a list of GateString objects!" ) else: gatestring_list = dataset.keys() spamLabels = dataset.get_spam_labels() if spamLabelOrder is not None: assert (len(spamLabelOrder) == len(spamLabels)) assert (all([sl in spamLabels for sl in spamLabelOrder])) assert (all([sl in spamLabelOrder for sl in spamLabels])) spamLabels = spamLabelOrder headerString = '## Columns = ' + ", ".join( ["%s count" % sl for sl in spamLabels]) parser = _stdinput.StdInputParser() f = open(filename, 'w') print >> f, headerString for gateString in gatestring_list: #gateString should be a GateString object here dataRow = dataset[gateString.tup] print >> f, gateString.str + " " + " ".join([("%g" % dataRow[sl]) for sl in spamLabels]) f.close()
def load_gatestring_dict(filename): """ Load a gate string dictionary from a file, formatted using the standard text-format. Parameters ---------- filename : string The name of the file. Returns ------- Dictionary with keys = gate string labels and values = GateString objects. """ std = _stdinput.StdInputParser() return std.parse_dictfile(filename)