Exemple #1
0
def read_csv_data(filename, dataSet=None, parkinController=None):
    '''
    Parses a CSV (comma separated values) file with experimental data.

    The CSV has to have a header line. Fields may be encapsulated by " (but they
    don't have to be). The value format is e.g. "3.14".

    Headers have the following convention:
        - First column called C{"Timepoint [%UNIT%]"}, e.g. C{"Timepoint [s]"}
        - Second column called C{"Unit"} for the time unit.
        - One column per Species ID, given the unit in square brackets. E.g: C{"LH [mg/ml]"}
        - Optionally: One column per Species Weight called "Weight" *immediately* after the ID column.
        - All other columns are handled as kind of "meta data". So, there can be e.g. a "patient id" column.
          The data of this column will be put in its own EntityData object with isMetaData = True.
    '''
    try:
        csv.register_dialect("ExperimentalDataCSV",
                             delimiter='\t',
                             quotechar='"',
                             skipinitialspace=True)
        reader = csv.reader(open(filename), dialect="ExperimentalDataCSV")

        descriptors = []
        descriptorUnit = ""

        columnIDs = []
        columnIDToUnit = {}
        columnIDHasWeightColumn = {}
        columnIDsToValues = {}
        columnIndexToColumnID = {}

        idToName = {}

        originalHeaders = {}

        columnIndexesWithWeightData = []

        columnIndexesWithMetaData = [
        ]  # for columns with "non-critical" information
        columnIDsWithMetaData = []

        previousId = None

        # getting data from file and preparing it for a DataSet
        for (i, row) in enumerate(reader):
            if i == 0:  # in header row
                for (j, header) in enumerate(row):
                    if j == 0:  # timepoint header
                        originalDescriptorHeader = header
                        if not "[" in header:
                            descriptorUnit = "N/A"
                        else:
                            descriptorUnit = header.split("[")[1].rsplit(
                                "]")[0].strip()
                    else:  # all non-timepoint headers
                        if "[" in header:  # the current cell is a Species definition
                            splitID = header.split("[")
                            id = splitID[0].strip()
                            unit = splitID[1].strip()[:-1]
                            columnIDs.append(id)
                            columnIDToUnit[id] = unit
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            originalHeaders[id] = header
                            previousId = id
                        elif previousId and STANDARD_DEVIATION in header.lower(
                        ):  # if in weight-defining cell
                            # radical change: handle weight columns as "normal" data, save them in separate EntityData
                            columnIDHasWeightColumn[
                                previousId] = True  # will be used later on
                            columnIndexesWithWeightData.append(j)

                            id = "weight_%s" % previousId
                            columnIDs.append(id)
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            idToName[id] = "SD"
                            originalHeaders[id] = header

                            previousId = None
                        else:  # in non-defined arbitrary cell, e.g. "Patient ID"
                            columnIndexesWithMetaData.append(j)
                            columnIDsWithMetaData.append(header)
                            columnIDs.append(header)
                            columnIDsToValues[header] = []
                            columnIndexToColumnID[j] = header
                            originalHeaders[header] = header
                            previousId = None
                            logging.debug(
                                "DataHandling.read_csv_data(): Reading 'meta data' of column %s"
                                % header)

            else:  # for all non-header rows
                for (j, value) in enumerate(row):
                    if j == 0:  # data descriptor column (e.g. timepoints)
                        descriptors.append(value)
                    elif j in columnIndexesWithMetaData:  # handling "non-species" columns
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)
                        previousId = None
                    elif columnIDHasWeightColumn.get(
                            previousId,
                            False):  # this is a weight-defining cell
                        columnIDsToValues[previousId].append(value)
                        previousId = None
                    else:
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)

        # loop through the collected data and feed it into a OrderedDict with EntityData objects
        # as values. This is the internal data structure of a DataSet object.

        dataSet.dataDescriptors = descriptors  # e.g. global timepoint list
        dataSet.dataDescriptorUnit = descriptorUnit
        dataSet.descriptorHeader = originalDescriptorHeader

        lastEntityData = None
        entityMap = OrderedDict()  # keep Species order
        for j, columnID in enumerate(columnIDs):
            logging.debug("About to save data for column %s" % columnID)

            entityData = EntityData()

            entityData.dataDescriptorUnit = descriptorUnit
            entityData.datapointUnit = columnIDToUnit.get(columnID, None)
            entityData.setType(entitydata.TYPE_EXPERIMENTAL)
            entityData.setAssociatedDataSet(
                dataSet
            )  # always use this method, so SIGNALs are correctly connected!
            entityData.originalId = columnID  # can be used as an ID fallback (e.g. for plotting) later
            entityData.originalHeader = originalHeaders[columnID]

            if idToName.has_key(columnID):
                entityData.setName(idToName[columnID])

            if columnID in columnIDsWithMetaData:
                entityData.isMetaData = True
            if j in columnIndexesWithWeightData:
                entityData.isWeightData = True
                entityData.setWeightData(lastEntityData)

            values = columnIDsToValues[columnID]
            if not values:
                logging.error(
                    "datahandling.load_csv_data(): No values for column %s" %
                    columnID)
                continue

            for i, value in enumerate(values):
                try:
                    value = float(value)
                except:
                    value = None

                timepoint = descriptors[i]
                entityData.dataDescriptors.append(timepoint)

                entityData.datapoints.append(value)

            # try to associate a Species in the currently loaded model (if any) with the current Species ID
            if not columnID in columnIDsWithMetaData and parkinController and parkinController.ActiveModelController and parkinController.ActiveModelController.sbmlModel:
                entityData.sbmlEntity = findSbmlEntity(
                    columnID, parkinController.ActiveModelController.sbmlModel)

            if entityData.sbmlEntity:
                entityMap[entityData.sbmlEntity] = entityData
            else:
                entityMap[columnID] = entityData

            lastEntityData = entityData

        return entityMap

    except Exception, e:
        logging.error(
            "DataHandling.read_csv_data(): Error while reading file %s:\n%s" %
            (filename, e))
Exemple #2
0
def read_csv_data(filename, dataSet=None, parkinController=None):
    '''
    Parses a CSV (comma separated values) file with experimental data.

    The CSV has to have a header line. Fields may be encapsulated by " (but they
    don't have to be). The value format is e.g. "3.14".

    Headers have the following convention:
        - First column called C{"Timepoint [%UNIT%]"}, e.g. C{"Timepoint [s]"}
        - Second column called C{"Unit"} for the time unit.
        - One column per Species ID, given the unit in square brackets. E.g: C{"LH [mg/ml]"}
        - Optionally: One column per Species Weight called "Weight" *immediately* after the ID column.
        - All other columns are handled as kind of "meta data". So, there can be e.g. a "patient id" column.
          The data of this column will be put in its own EntityData object with isMetaData = True.
    '''
    try:
        csv.register_dialect("ExperimentalDataCSV", delimiter='\t', quotechar='"', skipinitialspace=True)
        reader = csv.reader(open(filename), dialect="ExperimentalDataCSV")

        descriptors = []
        descriptorUnit = ""

        columnIDs = []
        columnIDToUnit = {}
        columnIDHasWeightColumn = {}
        columnIDsToValues = {}  
        columnIndexToColumnID = {}

        idToName = {}

        originalHeaders = {}

        columnIndexesWithWeightData = []

        columnIndexesWithMetaData = [] # for columns with "non-critical" information
        columnIDsWithMetaData = []

        previousId = None

        # getting data from file and preparing it for a DataSet
        for (i, row) in enumerate(reader):
            if i == 0: # in header row
                for (j, header) in enumerate(row):
                    if j == 0:  # timepoint header
                        originalDescriptorHeader = header
                        if not "[" in header:
                            descriptorUnit = "N/A"
                        else:
                            descriptorUnit = header.split("[")[1].rsplit("]")[0].strip()
                    else: # all non-timepoint headers
                        if "[" in header:   # the current cell is a Species definition
                            splitID = header.split("[")
                            id = splitID[0].strip()
                            unit = splitID[1].strip()[:-1]
                            columnIDs.append(id)
                            columnIDToUnit[id] = unit
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            originalHeaders[id] = header
                            previousId = id
                        elif previousId and STANDARD_DEVIATION in header.lower():    # if in weight-defining cell
                            # radical change: handle weight columns as "normal" data, save them in separate EntityData
                            columnIDHasWeightColumn[previousId] = True # will be used later on
                            columnIndexesWithWeightData.append(j)

                            id = "weight_%s" % previousId
                            columnIDs.append(id)
                            columnIDsToValues[id] = []
                            columnIndexToColumnID[j] = id
                            idToName[id] = "SD"
                            originalHeaders[id] = header

                            previousId = None
                        else:   # in non-defined arbitrary cell, e.g. "Patient ID"
                            columnIndexesWithMetaData.append(j)
                            columnIDsWithMetaData.append(header)
                            columnIDs.append(header)
                            columnIDsToValues[header] = []
                            columnIndexToColumnID[j] = header
                            originalHeaders[header] = header
                            previousId = None
                            logging.debug("DataHandling.read_csv_data(): Reading 'meta data' of column %s" % header)


            else:   # for all non-header rows
                for (j, value) in enumerate(row):
                    if j == 0:    # data descriptor column (e.g. timepoints)
                        descriptors.append(value)
                    elif j in columnIndexesWithMetaData:    # handling "non-species" columns
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)
                        previousId = None
                    elif columnIDHasWeightColumn.get(previousId, False):    # this is a weight-defining cell
                        columnIDsToValues[previousId].append(value)
                        previousId = None
                    else:
                        id = columnIndexToColumnID[j]
                        columnIDsToValues[id].append(value)

        # loop through the collected data and feed it into a OrderedDict with EntityData objects
        # as values. This is the internal data structure of a DataSet object.

        dataSet.dataDescriptors = descriptors   # e.g. global timepoint list
        dataSet.dataDescriptorUnit = descriptorUnit
        dataSet.descriptorHeader = originalDescriptorHeader

        lastEntityData = None
        entityMap = OrderedDict()    # keep Species order
        for j, columnID in enumerate(columnIDs):
            logging.debug("About to save data for column %s" % columnID)

            entityData = EntityData()

            entityData.dataDescriptorUnit = descriptorUnit
            entityData.datapointUnit = columnIDToUnit.get(columnID, None)
            entityData.setType(entitydata.TYPE_EXPERIMENTAL)
            entityData.setAssociatedDataSet(dataSet)    # always use this method, so SIGNALs are correctly connected!
            entityData.originalId = columnID  # can be used as an ID fallback (e.g. for plotting) later
            entityData.originalHeader = originalHeaders[columnID]

            if idToName.has_key(columnID):
                entityData.setName(idToName[columnID])

            if columnID in columnIDsWithMetaData:
                entityData.isMetaData = True
            if j in columnIndexesWithWeightData:
                entityData.isWeightData = True
                entityData.setWeightData(lastEntityData)

            values = columnIDsToValues[columnID]
            if not values:
                logging.error("datahandling.load_csv_data(): No values for column %s" % columnID)
                continue


            for i, value in enumerate(values):
                try:
                    value = float(value)
                except :
                    value = None
                    
                timepoint = descriptors[i]
                entityData.dataDescriptors.append(timepoint)

                entityData.datapoints.append(value)

            # try to associate a Species in the currently loaded model (if any) with the current Species ID
            if not columnID in columnIDsWithMetaData and parkinController and parkinController.ActiveModelController and parkinController.ActiveModelController.sbmlModel:
                entityData.sbmlEntity = findSbmlEntity(columnID, parkinController.ActiveModelController.sbmlModel)

            if entityData.sbmlEntity:
                entityMap[entityData.sbmlEntity] = entityData
            else:
                entityMap[columnID] = entityData

            lastEntityData = entityData

        return entityMap


    except Exception, e:
        logging.error("DataHandling.read_csv_data(): Error while reading file %s:\n%s" % (filename, e))
Exemple #3
0
                        entity = entitydata.EntityData()
                        if dataSet:
                            entity.setAssociatedDataSet(dataSet)
                        entityMap[dataID] = entity
                    entity.dataDescriptors.append(time)
                    entity.datapoints.append(value)
    except Exception, e:
        logging.error("Could not read file %s. Error: %s" % (filename, e))
        return

    for (id, entity) in entityMap.items():
        entity.originalId = id
        entity.originalFilename = filename
        if parkinController and parkinController.ActiveModelController and parkinController.ActiveModelController.sbmlModel:
            #entity.sbmlEntity = helpers.sbmlhelpers.findSbmlEntity(id, parkinController.ActiveModelController.sbmlModel)
            entity.sbmlEntity = findSbmlEntity(
                id, parkinController.ActiveModelController.sbmlModel)

    return entityMap


def read_csv_data(filename, dataSet=None, parkinController=None):
    '''
    Parses a CSV (comma separated values) file with experimental data.

    The CSV has to have a header line. Fields may be encapsulated by " (but they
    don't have to be). The value format is e.g. "3.14".

    Headers have the following convention:
        - First column called C{"Timepoint [%UNIT%]"}, e.g. C{"Timepoint [s]"}
        - Second column called C{"Unit"} for the time unit.
        - One column per Species ID, given the unit in square brackets. E.g: C{"LH [mg/ml]"}
Exemple #4
0
                        entity = entitydata.EntityData()
                        if dataSet:
                            entity.setAssociatedDataSet(dataSet)
                        entityMap[dataID] = entity
                    entity.dataDescriptors.append(time)
                    entity.datapoints.append(value)
    except Exception, e:
        logging.error("Could not read file %s. Error: %s" % (filename, e))
        return

    for (id, entity) in entityMap.items():
        entity.originalId = id
        entity.originalFilename = filename
        if parkinController and parkinController.ActiveModelController and parkinController.ActiveModelController.sbmlModel:
            #entity.sbmlEntity = helpers.sbmlhelpers.findSbmlEntity(id, parkinController.ActiveModelController.sbmlModel)
            entity.sbmlEntity = findSbmlEntity(id, parkinController.ActiveModelController.sbmlModel)

    return entityMap


def read_csv_data(filename, dataSet=None, parkinController=None):
    '''
    Parses a CSV (comma separated values) file with experimental data.

    The CSV has to have a header line. Fields may be encapsulated by " (but they
    don't have to be). The value format is e.g. "3.14".

    Headers have the following convention:
        - First column called C{"Timepoint [%UNIT%]"}, e.g. C{"Timepoint [s]"}
        - Second column called C{"Unit"} for the time unit.
        - One column per Species ID, given the unit in square brackets. E.g: C{"LH [mg/ml]"}