Esempio n. 1
0
def load_multidataset(filename, cache=False):
    """
    Load a MultiDataSet from a file.  First tries to load file as a 
    saved MultiDataSet object, then as a standard text-formatted MultiDataSet.
  
    Parameters
    ----------
    filename : string
        The name of the file
  
    cache : bool, optional
        When set to True, a pickle file with the name filename + ".cache"
        is searched for and loaded instead of filename if it exists
        and is newer than filename.  If no cache file exists or one
        exists but it is older than filename, a cache file will be
        written after loading from filename.
  
    Returns
    -------
    MultiDataSet
    """

    try:
        # a saved MultiDataset object is ok
        mds = _objs.MultiDataSet(fileToLoadFrom=filename)
    except:
        if cache:
            bReadCache = False
            cache_filename = filename + ".cache"
            if _os.path.exists( cache_filename ) and \
               _os.path.getmtime(filename) < _os.path.getmtime(cache_filename):
                try:
                    print "Loading from cache file: ", cache_filename
                    mds = _objs.MultiDataSet(fileToLoadFrom=cache_filename)
                    return mds
                except:
                    print "Failed to load from cache file"
            else:
                print "Cache file not found or is tool old -- one will be created after loading is completed"

            # otherwise must use standard dataset file format
            parser = _stdinput.StdInputParser()
            mds = parser.parse_multidatafile(filename)

            print "Writing cache file (to speed future loads): ", cache_filename
            mds.save(cache_filename)

        else:
            # otherwise must use standard dataset file format
            parser = _stdinput.StdInputParser()
            mds = parser.parse_multidatafile(filename)
    return mds
Esempio n. 2
0
def load_gatestring_list(filename, readRawStrings=False):
    """
    Load a gate string list from a file, formatted 
    using the standard text-format.

    Parameters
    ----------
    filename : string
        The name of the file

    readRawStrings : boolean
        If True, gate strings are not converted
        to tuples of gate labels.

    Returns
    -------
    list of GateString objects
    """
    if readRawStrings:
        rawList = []
        for line in open(filename, 'r'):
            if len(line.strip()) == 0: continue
            if len(line) == 0 or line[0] == '#': continue
            rawList.append(line.strip())
        return rawList
    else:
        std = _stdinput.StdInputParser()
        return std.parse_stringfile(filename)
Esempio n. 3
0
def write_multidataset(filename,
                       multidataset,
                       gatestring_list=None,
                       spamLabelOrder=None):
    """
    Write a text-formatted multi-dataset file.

    Parameters
    ----------
    filename : string
        The filename to write.

    multidataset : MultiDataSet
        The multi data set from which counts are obtained.

    gatestring_list : list of GateStrings
        The list of gate strings to include in the written dataset.
        If None, all gate strings are output.

    spamLabelOrder : list, optional
        A list of the SPAM labels in multidataset which specifies
        the column order in the output file.
    """

    if gatestring_list is not None:
        if len(gatestring_list) > 0 and not isinstance(gatestring_list[0],
                                                       _objs.GateString):
            raise ValueError(
                "Argument gatestring_list must be a list of GateString objects!"
            )
    else:
        gatestring_list = multidataset.gsIndex.keys(
        )  #TODO: make access function for gatestrings?

    spamLabels = multidataset.get_spam_labels()
    if spamLabelOrder is not None:
        assert (len(spamLabelOrder) == len(spamLabels))
        assert (all([sl in spamLabels for sl in spamLabelOrder]))
        assert (all([sl in spamLabelOrder for sl in spamLabels]))
        spamLabels = spamLabelOrder

    dsLabels = multidataset.keys()

    headerString = '## Columns = ' + ", ".join(
        ["%s %s count" % (dsl, sl) for dsl in dsLabels for sl in spamLabels])
    parser = _stdinput.StdInputParser()

    f = open(filename, 'w')
    print >> f, headerString
    for gateString in gatestring_list:  #gateString should be a GateString object here
        gs = gateString.tup  #gatestring tuple
        print >> f, gateString.str + "  " + "  ".join(
            [("%g" % multidataset[dsl][gs][sl]) for dsl in dsLabels
             for sl in spamLabels])
    f.close()
Esempio n. 4
0
def write_dataset(filename,
                  dataset,
                  gatestring_list=None,
                  spamLabelOrder=None):
    """
    Write a text-formatted dataset file.

    Parameters
    ----------
    filename : string
        The filename to write.

    dataset : DataSet
        The data set from which counts are obtained.

    gatestring_list : list of GateStrings, optional
        The list of gate strings to include in the written dataset.
        If None, all gate strings are output.

    spamLabelOrder : list, optional
        A list of the SPAM labels in dataset which specifies
        the column order in the output file.
    """
    if gatestring_list is not None:
        if len(gatestring_list) > 0 and not isinstance(gatestring_list[0],
                                                       _objs.GateString):
            raise ValueError(
                "Argument gatestring_list must be a list of GateString objects!"
            )
    else:
        gatestring_list = dataset.keys()

    spamLabels = dataset.get_spam_labels()
    if spamLabelOrder is not None:
        assert (len(spamLabelOrder) == len(spamLabels))
        assert (all([sl in spamLabels for sl in spamLabelOrder]))
        assert (all([sl in spamLabelOrder for sl in spamLabels]))
        spamLabels = spamLabelOrder

    headerString = '## Columns = ' + ", ".join(
        ["%s count" % sl for sl in spamLabels])
    parser = _stdinput.StdInputParser()

    f = open(filename, 'w')
    print >> f, headerString
    for gateString in gatestring_list:  #gateString should be a GateString object here
        dataRow = dataset[gateString.tup]
        print >> f, gateString.str + "  " + "  ".join([("%g" % dataRow[sl])
                                                       for sl in spamLabels])
    f.close()
Esempio n. 5
0
def load_gatestring_dict(filename):
    """
    Load a gate string dictionary from a file, formatted 
    using the standard text-format.

    Parameters
    ----------
    filename : string
        The name of the file.

    Returns
    -------
    Dictionary with keys = gate string labels and
      values = GateString objects.
    """
    std = _stdinput.StdInputParser()
    return std.parse_dictfile(filename)