Exemple #1
0
def read_integrated_data(filename, listOfEntities, dataSet=None):
    """
    DEPRECTATED! Just for reminiscing in the past... :)

    Reads the results of a Fortran-BioParkin run.

    The list of SBML entities is used to associate columns in the entityData file
    with their corresponding entityData entities.

    format:
    C{[time [WHITESPACE] par 1 [WHITESPACE] par n}

    e.g.:
    C{0.0000000000000000E+00     1.4999999999999999E-02    0.0000000000000000E+00}

    """

    try:
        csv.register_dialect("SimulationData", delimiter=' ', quotechar='"', skipinitialspace=True)
        reader = csv.reader(open(filename), dialect="SimulationData")
        time = []
        values = {}
        for (i, row) in enumerate(reader):  # we don't use the i... yet

            for (j, value) in enumerate(row):
                if j == 0:  # jump over the 0th row
                    time.append(value) # time value is always in the 0th row
                    continue

                columnID = j - 1 # possibility to compensate for column offset
                entity = listOfEntities[columnID]
                if values.has_key(entity):
                    values[entity].append(value) # append to existing list
                else:
                    values[entity] = [value] # create list for the first time

        # create final data structure
        dataMap = {}
        if len(values) > 0: # if we have dataMap
            for entity in values.keys():
                entityData = EntityData()
                if dataSet:
                    entityData.setAssociatedDataSet(dataSet)
                entityData.dataDescriptors = time
                entityData.datapoints = values[entity]
                entityData.type = entitydata.TYPE_SIMULATED
                entityData.sbmlEntity = entity.wrappedEntity
                entityData.originalFilename = filename
                dataMap[entity] = entityData
            return dataMap

    except Exception, e:
        logging.error("Error while trying to read simulation dataMap file: %s" % e)
Exemple #2
0
def read_csv_data(filename, dataSet=None, parkinController=None):
    '''
    Parses a CSV (comma separated values) file with experimental data.

    The CSV has to have a header line. Fields may be encapsulated by " (but they
    don't have to be). The value format is e.g. "3.14".

    Headers have the following convention:
        - First column called C{"Timepoint [%UNIT%]"}, e.g. C{"Timepoint [s]"}
        - Second column called C{"Unit"} for the time unit.
        - One column per Species ID, given the unit in square brackets. E.g: C{"LH [mg/ml]"}
        - Optionally: One column per Species Weight called "Weight" *immediately* after the ID column.
        - All other columns are handled as kind of "meta data". So, there can be e.g. a "patient id" column.
          The data of this column will be put in its own EntityData object with isMetaData = True.
    '''
    try:
        csv.register_dialect("ExperimentalDataCSV",
                             delimiter='\t',
                             quotechar='"',
                             skipinitialspace=True)
        reader = csv.reader(open(filename), dialect="ExperimentalDataCSV")

        descriptors = []
        descriptorUnit = ""

        columnIDs = []
        columnIDToUnit = {}
        columnIDHasWeightColumn = {}
        columnIDsToValues = {}
        columnIndexToColumnID = {}

        idToName = {}

        originalHeaders = {}

        columnIndexesWithWeightData = []

        columnIndexesWithMetaData = [
        ]  # for columns with "non-critical" information
        columnIDsWithMetaData = []

        previousId = None

        # getting data from file and preparing it for a DataSet
        for (i, row) in enumerate(reader):
            if i == 0:  # in header row
                for (j, header) in enumerate(row):
                    if j == 0:  # timepoint header
                        originalDescriptorHeader = header
                        if not "[" in header:
                            descriptorUnit = "N/A"
                        else:
                            descriptorUnit = header.split("[")[1].rsplit(
                                "]")[0].strip()
                    else:  # all non-timepoint headers
                        if "[" in header:  # the current cell is a Species definition
                            splitID = header.split("[")
                            id = splitID[0].strip()
                            unit = splitID[1].strip()[:-1]
                            columnIDs.append(id)
                            columnIDToUnit[id] = unit
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            originalHeaders[id] = header
                            previousId = id
                        elif previousId and STANDARD_DEVIATION in header.lower(
                        ):  # if in weight-defining cell
                            # radical change: handle weight columns as "normal" data, save them in separate EntityData
                            columnIDHasWeightColumn[
                                previousId] = True  # will be used later on
                            columnIndexesWithWeightData.append(j)

                            id = "weight_%s" % previousId
                            columnIDs.append(id)
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            idToName[id] = "SD"
                            originalHeaders[id] = header

                            previousId = None
                        else:  # in non-defined arbitrary cell, e.g. "Patient ID"
                            columnIndexesWithMetaData.append(j)
                            columnIDsWithMetaData.append(header)
                            columnIDs.append(header)
                            columnIDsToValues[header] = []
                            columnIndexToColumnID[j] = header
                            originalHeaders[header] = header
                            previousId = None
                            logging.debug(
                                "DataHandling.read_csv_data(): Reading 'meta data' of column %s"
                                % header)

            else:  # for all non-header rows
                for (j, value) in enumerate(row):
                    if j == 0:  # data descriptor column (e.g. timepoints)
                        descriptors.append(value)
                    elif j in columnIndexesWithMetaData:  # handling "non-species" columns
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)
                        previousId = None
                    elif columnIDHasWeightColumn.get(
                            previousId,
                            False):  # this is a weight-defining cell
                        columnIDsToValues[previousId].append(value)
                        previousId = None
                    else:
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)

        # loop through the collected data and feed it into a OrderedDict with EntityData objects
        # as values. This is the internal data structure of a DataSet object.

        dataSet.dataDescriptors = descriptors  # e.g. global timepoint list
        dataSet.dataDescriptorUnit = descriptorUnit
        dataSet.descriptorHeader = originalDescriptorHeader

        lastEntityData = None
        entityMap = OrderedDict()  # keep Species order
        for j, columnID in enumerate(columnIDs):
            logging.debug("About to save data for column %s" % columnID)

            entityData = EntityData()

            entityData.dataDescriptorUnit = descriptorUnit
            entityData.datapointUnit = columnIDToUnit.get(columnID, None)
            entityData.setType(entitydata.TYPE_EXPERIMENTAL)
            entityData.setAssociatedDataSet(
                dataSet
            )  # always use this method, so SIGNALs are correctly connected!
            entityData.originalId = columnID  # can be used as an ID fallback (e.g. for plotting) later
            entityData.originalHeader = originalHeaders[columnID]

            if idToName.has_key(columnID):
                entityData.setName(idToName[columnID])

            if columnID in columnIDsWithMetaData:
                entityData.isMetaData = True
            if j in columnIndexesWithWeightData:
                entityData.isWeightData = True
                entityData.setWeightData(lastEntityData)

            values = columnIDsToValues[columnID]
            if not values:
                logging.error(
                    "datahandling.load_csv_data(): No values for column %s" %
                    columnID)
                continue

            for i, value in enumerate(values):
                try:
                    value = float(value)
                except:
                    value = None

                timepoint = descriptors[i]
                entityData.dataDescriptors.append(timepoint)

                entityData.datapoints.append(value)

            # try to associate a Species in the currently loaded model (if any) with the current Species ID
            if not columnID in columnIDsWithMetaData and parkinController and parkinController.ActiveModelController and parkinController.ActiveModelController.sbmlModel:
                entityData.sbmlEntity = findSbmlEntity(
                    columnID, parkinController.ActiveModelController.sbmlModel)

            if entityData.sbmlEntity:
                entityMap[entityData.sbmlEntity] = entityData
            else:
                entityMap[columnID] = entityData

            lastEntityData = entityData

        return entityMap

    except Exception, e:
        logging.error(
            "DataHandling.read_csv_data(): Error while reading file %s:\n%s" %
            (filename, e))
Exemple #3
0
def read_integrated_data(filename, listOfEntities, dataSet=None):
    '''
    Reads the results of a Fortran-BioParkin run.
    
    The list of SBML entities is used to associate columns in the entityData file
    with their corresponding entityData entities.
        
    format:
    C{[time [WHITESPACE] par 1 [WHITESPACE] par n}
    
    e.g.:
    C{0.0000000000000000E+00     1.4999999999999999E-02    0.0000000000000000E+00}
    
    '''

    try:
        csv.register_dialect("SimulationData",
                             delimiter=' ',
                             quotechar='"',
                             skipinitialspace=True)
        reader = csv.reader(open(filename), dialect="SimulationData")
        time = []
        values = {}
        for (i, row) in enumerate(reader):  # we don't use the i... yet

            for (j, value) in enumerate(row):
                if j == 0:  # jump over the 0th row
                    time.append(value)  # time value is always in the 0th row
                    continue

                columnID = j - 1  # possibility to compensate for column offset
                entity = listOfEntities[columnID]
                if values.has_key(entity):
                    values[entity].append(value)  # append to existing list
                else:
                    values[entity] = [value]  # create list for the first time

        # create final data structure
        dataMap = {}
        if len(values) > 0:  # if we have dataMap
            for entity in values.keys():
                entityData = EntityData()
                if dataSet:
                    entityData.setAssociatedDataSet(dataSet)
                entityData.dataDescriptors = time
                entityData.datapoints = values[entity]
                entityData.type = entitydata.TYPE_SIMULATED
                entityData.sbmlEntity = entity.wrappedEntity
                entityData.originalFilename = filename
                dataMap[entity] = entityData
            return dataMap

    except Exception, e:
        logging.error(
            "Error while trying to read simulation dataMap file: %s" % e)


#def read_integrated_data_old(filename):
#    '''
#    OLD VERSION, DON'T USE!
#    Only for looking up old code!
#
#    Reads the results of a Fortran-BioParkin run.
#
#    input format, e.g. see:
#    /home/bzfwadem/workspace/BioParkin/compute/temp/P_gnrh_rec4_Solution.dat
#
#    format:
#    time \t par 1 \t par n
#
#    '''
#    data = {}
#
#    #with csv.reader(open(filename), delimiter=' ', quotechar='"') as reader:
#    try:
#        csv.register_dialect("SimulationData", delimiter=' ', quotechar='"', skipinitialspace=True)
#        reader = csv.reader(open(filename), dialect="SimulationData")
#        time = []
#        values = {}
#        for (i, row) in enumerate(reader):  # we don't use the i... yet
#
#            for (j, value) in enumerate(row):
#                if j == 0:  # jump over the 0th row
#                    time.append(value) # time value is always in the 0th row
#                    continue
#
#                columnID = j # possibility to compensate for column offset
#                # we will use the column index as the key
#                # TODO: We need something better than the column index. Column indices should be matched to previously loaded experimental data (or parts of the SBML model?)
#                if values.has_key(columnID):
#                    values[columnID].append(value) # append to existing list
#                else:
#                    values[columnID] = [value] # create list for the first time
#
#        # create final data structure
#        if len(values) > 0: # if we have data
#            for dataID in values.keys():
#                #if not data.has_key(dataID):
#                entry = {services.dataservice.TIME: time,
#                         services.dataservice.VALUE: values[dataID]}
#                data[str(dataID)] = entry   #str: because the experimental data's keys are strings
#                #else:
#                #    entry = data[dataID]
#            #                entry[Services.DataService.TIME] = time
#            #                entry[Services.DataService.VALUE].append(value)
#
#    except Exception, e:
#        logging.error("Error while trying to read simulation data file: %s" % e)
#    #    finally:
#    #        reader.close() # does not exist and does not seem to be necessary
#
#
#    #
#    #    dictReader = csv.DictReader(open(filename), ["time", range(nrCols)-1])
#    #    unformattedData = dictReader.
#
#    # TODO
#    #    for row in reader:
#    #        #time = float(row[0])
#    #        for (i, value) in row.enumerate_stripped():
#    #            if i == 0:
#    #                time = float(row[0])
#    #            else:
#
#    return data
Exemple #4
0
def read_csv_data(filename, dataSet=None, parkinController=None):
    '''
    Parses a CSV (comma separated values) file with experimental data.

    The CSV has to have a header line. Fields may be encapsulated by " (but they
    don't have to be). The value format is e.g. "3.14".

    Headers have the following convention:
        - First column called C{"Timepoint [%UNIT%]"}, e.g. C{"Timepoint [s]"}
        - Second column called C{"Unit"} for the time unit.
        - One column per Species ID, given the unit in square brackets. E.g: C{"LH [mg/ml]"}
        - Optionally: One column per Species Weight called "Weight" *immediately* after the ID column.
        - All other columns are handled as kind of "meta data". So, there can be e.g. a "patient id" column.
          The data of this column will be put in its own EntityData object with isMetaData = True.
    '''
    try:
        csv.register_dialect("ExperimentalDataCSV", delimiter='\t', quotechar='"', skipinitialspace=True)
        reader = csv.reader(open(filename), dialect="ExperimentalDataCSV")

        descriptors = []
        descriptorUnit = ""

        columnIDs = []
        columnIDToUnit = {}
        columnIDHasWeightColumn = {}
        columnIDsToValues = {}  
        columnIndexToColumnID = {}

        idToName = {}

        originalHeaders = {}

        columnIndexesWithWeightData = []

        columnIndexesWithMetaData = [] # for columns with "non-critical" information
        columnIDsWithMetaData = []

        previousId = None

        # getting data from file and preparing it for a DataSet
        for (i, row) in enumerate(reader):
            if i == 0: # in header row
                for (j, header) in enumerate(row):
                    if j == 0:  # timepoint header
                        originalDescriptorHeader = header
                        if not "[" in header:
                            descriptorUnit = "N/A"
                        else:
                            descriptorUnit = header.split("[")[1].rsplit("]")[0].strip()
                    else: # all non-timepoint headers
                        if "[" in header:   # the current cell is a Species definition
                            splitID = header.split("[")
                            id = splitID[0].strip()
                            unit = splitID[1].strip()[:-1]
                            columnIDs.append(id)
                            columnIDToUnit[id] = unit
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            originalHeaders[id] = header
                            previousId = id
                        elif previousId and STANDARD_DEVIATION in header.lower():    # if in weight-defining cell
                            # radical change: handle weight columns as "normal" data, save them in separate EntityData
                            columnIDHasWeightColumn[previousId] = True # will be used later on
                            columnIndexesWithWeightData.append(j)

                            id = "weight_%s" % previousId
                            columnIDs.append(id)
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            idToName[id] = "SD"
                            originalHeaders[id] = header

                            previousId = None
                        else:   # in non-defined arbitrary cell, e.g. "Patient ID"
                            columnIndexesWithMetaData.append(j)
                            columnIDsWithMetaData.append(header)
                            columnIDs.append(header)
                            columnIDsToValues[header] = []
                            columnIndexToColumnID[j] = header
                            originalHeaders[header] = header
                            previousId = None
                            logging.debug("DataHandling.read_csv_data(): Reading 'meta data' of column %s" % header)


            else:   # for all non-header rows
                for (j, value) in enumerate(row):
                    if j == 0:    # data descriptor column (e.g. timepoints)
                        descriptors.append(value)
                    elif j in columnIndexesWithMetaData:    # handling "non-species" columns
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)
                        previousId = None
                    elif columnIDHasWeightColumn.get(previousId, False):    # this is a weight-defining cell
                        columnIDsToValues[previousId].append(value)
                        previousId = None
                    else:
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)

        # loop through the collected data and feed it into a OrderedDict with EntityData objects
        # as values. This is the internal data structure of a DataSet object.

        dataSet.dataDescriptors = descriptors   # e.g. global timepoint list
        dataSet.dataDescriptorUnit = descriptorUnit
        dataSet.descriptorHeader = originalDescriptorHeader

        lastEntityData = None
        entityMap = OrderedDict()    # keep Species order
        for j, columnID in enumerate(columnIDs):
            logging.debug("About to save data for column %s" % columnID)

            entityData = EntityData()

            entityData.dataDescriptorUnit = descriptorUnit
            entityData.datapointUnit = columnIDToUnit.get(columnID, None)
            entityData.setType(entitydata.TYPE_EXPERIMENTAL)
            entityData.setAssociatedDataSet(dataSet)    # always use this method, so SIGNALs are correctly connected!
            entityData.originalId = columnID  # can be used as an ID fallback (e.g. for plotting) later
            entityData.originalHeader = originalHeaders[columnID]

            if idToName.has_key(columnID):
                entityData.setName(idToName[columnID])

            if columnID in columnIDsWithMetaData:
                entityData.isMetaData = True
            if j in columnIndexesWithWeightData:
                entityData.isWeightData = True
                entityData.setWeightData(lastEntityData)

            values = columnIDsToValues[columnID]
            if not values:
                logging.error("datahandling.load_csv_data(): No values for column %s" % columnID)
                continue


            for i, value in enumerate(values):
                try:
                    value = float(value)
                except :
                    value = None
                    
                timepoint = descriptors[i]
                entityData.dataDescriptors.append(timepoint)

                entityData.datapoints.append(value)

            # try to associate a Species in the currently loaded model (if any) with the current Species ID
            if not columnID in columnIDsWithMetaData and parkinController and parkinController.ActiveModelController and parkinController.ActiveModelController.sbmlModel:
                entityData.sbmlEntity = findSbmlEntity(columnID, parkinController.ActiveModelController.sbmlModel)

            if entityData.sbmlEntity:
                entityMap[entityData.sbmlEntity] = entityData
            else:
                entityMap[columnID] = entityData

            lastEntityData = entityData

        return entityMap


    except Exception, e:
        logging.error("DataHandling.read_csv_data(): Error while reading file %s:\n%s" % (filename, e))
Exemple #5
0
def read_integrated_data(filename, listOfEntities, dataSet=None):
    '''
    Reads the results of a Fortran-BioParkin run.
    
    The list of SBML entities is used to associate columns in the entityData file
    with their corresponding entityData entities.
        
    format:
    C{[time [WHITESPACE] par 1 [WHITESPACE] par n}
    
    e.g.:
    C{0.0000000000000000E+00     1.4999999999999999E-02    0.0000000000000000E+00}
    
    '''

    try:
        csv.register_dialect("SimulationData", delimiter=' ', quotechar='"', skipinitialspace=True)
        reader = csv.reader(open(filename), dialect="SimulationData")
        time = []
        values = {}
        for (i, row) in enumerate(reader):  # we don't use the i... yet

            for (j, value) in enumerate(row):
                if j == 0:  # jump over the 0th row
                    time.append(value) # time value is always in the 0th row
                    continue

                columnID = j - 1 # possibility to compensate for column offset
                entity = listOfEntities[columnID]
                if values.has_key(entity):
                    values[entity].append(value) # append to existing list
                else:
                    values[entity] = [value] # create list for the first time

        # create final data structure
        dataMap = {}
        if len(values) > 0: # if we have dataMap
            for entity in values.keys():
                entityData = EntityData()
                if dataSet:
                    entityData.setAssociatedDataSet(dataSet)
                entityData.dataDescriptors = time
                entityData.datapoints = values[entity]
                entityData.type = entitydata.TYPE_SIMULATED
                entityData.sbmlEntity = entity.wrappedEntity
                entityData.originalFilename = filename
                dataMap[entity] = entityData
            return dataMap

    except Exception, e:
        logging.error("Error while trying to read simulation dataMap file: %s" % e)

#def read_integrated_data_old(filename):
#    '''
#    OLD VERSION, DON'T USE!
#    Only for looking up old code!
#
#    Reads the results of a Fortran-BioParkin run.
#
#    input format, e.g. see:
#    /home/bzfwadem/workspace/BioParkin/compute/temp/P_gnrh_rec4_Solution.dat
#
#    format:
#    time \t par 1 \t par n
#
#    '''
#    data = {}
#
#    #with csv.reader(open(filename), delimiter=' ', quotechar='"') as reader:
#    try:
#        csv.register_dialect("SimulationData", delimiter=' ', quotechar='"', skipinitialspace=True)
#        reader = csv.reader(open(filename), dialect="SimulationData")
#        time = []
#        values = {}
#        for (i, row) in enumerate(reader):  # we don't use the i... yet
#
#            for (j, value) in enumerate(row):
#                if j == 0:  # jump over the 0th row
#                    time.append(value) # time value is always in the 0th row
#                    continue
#
#                columnID = j # possibility to compensate for column offset
#                # we will use the column index as the key
#                # TODO: We need something better than the column index. Column indices should be matched to previously loaded experimental data (or parts of the SBML model?)
#                if values.has_key(columnID):
#                    values[columnID].append(value) # append to existing list
#                else:
#                    values[columnID] = [value] # create list for the first time
#
#        # create final data structure
#        if len(values) > 0: # if we have data
#            for dataID in values.keys():
#                #if not data.has_key(dataID):
#                entry = {services.dataservice.TIME: time,
#                         services.dataservice.VALUE: values[dataID]}
#                data[str(dataID)] = entry   #str: because the experimental data's keys are strings
#                #else:
#                #    entry = data[dataID]
#            #                entry[Services.DataService.TIME] = time
#            #                entry[Services.DataService.VALUE].append(value)
#
#    except Exception, e:
#        logging.error("Error while trying to read simulation data file: %s" % e)
#    #    finally:
#    #        reader.close() # does not exist and does not seem to be necessary
#
#
#    #
#    #    dictReader = csv.DictReader(open(filename), ["time", range(nrCols)-1])
#    #    unformattedData = dictReader.
#
#    # TODO
#    #    for row in reader:
#    #        #time = float(row[0])
#    #        for (i, value) in row.enumerate_stripped():
#    #            if i == 0:
#    #                time = float(row[0])
#    #            else:
#
#    return data