Esempio n. 1
0
    def process_data_to_dso(self, nmr_data, nmr_ppms, sample_labels, experiment_name):

        print("Processing spectra to dso...")
        sample_n = len(sample_labels)
        ppm_n = len(nmr_ppms)

        dso = DataSet(size=(sample_n, ppm_n))

        for n, nd in enumerate(nmr_data):
            print("Spectra %s" % sample_labels[n])
            dso.data[n, :] = nd
            dso.labels[0][n] = sample_labels[n]

        dso.labels[1] = [str(ppm) for ppm in nmr_ppms]
        dso.scales[1] = [float(ppm) for ppm in nmr_ppms]
        dso.name = experiment_name

        return dso
Esempio n. 2
0
    def process_data_to_dso(self, nmr_data, nmr_ppms, sample_labels,
                            experiment_name):

        print("Processing spectra to dso...")
        sample_n = len(sample_labels)
        ppm_n = len(nmr_ppms)

        dso = DataSet(size=(sample_n, ppm_n))

        for n, nd in enumerate(nmr_data):
            print("Spectra %s" % sample_labels[n])
            dso.data[n, :] = nd
            dso.labels[0][n] = sample_labels[n]

        dso.labels[1] = [str(ppm) for ppm in nmr_ppms]
        dso.scales[1] = [float(ppm) for ppm in nmr_ppms]
        dso.name = experiment_name

        return dso
Esempio n. 3
0
    def load_bml_datafile(self, data_path, target, name):

        dso = DataSet()

        # Read in data for the graphing metabolite, with associated value (generate mean)
        reader = csv.reader(utils.nonull(open(data_path, 'rb')),
                            delimiter='\t',
                            dialect='excel')

        for row in reader:
            if row and row[0] == 'metabolite':  # Look for the top row
                break
        else:
            return

        samples = row[1:-2]  # Sample identities
        samples = [sample[8:-1] for sample in samples]

        xdim = 0
        ydim = len(samples)

        raw_data = []
        metabolites = []

        for row in reader:
            xdim += 1
            metabolites.append(row[0])

            raw_data.append([float(i) for i in row[1:-2]])

        dso = DataSet(size=(ydim, xdim))
        dso.labels[1] = metabolites

        dso.data = np.array(raw_data).T

        dso.name = name
        dso.description = 'Imported from FIMA (%s)' % name

        return dso
Esempio n. 4
0
    def load_bml_datafile(self, data_path, target, name):

        dso = DataSet()

        # Read in data for the graphing metabolite, with associated value (generate mean)
        reader = csv.reader(utils.nonull(open(data_path, 'rb')), delimiter='\t', dialect='excel')

        for row in reader:
            if row and row[0] == 'metabolite':  # Look for the top row
                break
        else:
            return

        samples = row[1:-2]  # Sample identities
        samples = [sample[8:-1] for sample in samples]

        xdim = 0
        ydim = len(samples)

        raw_data = []
        metabolites = []

        for row in reader:
            xdim += 1
            metabolites.append(row[0])

            raw_data.append([float(i) for i in row[1:-2]])

        dso = DataSet(size=(ydim, xdim))
        dso.labels[1] = metabolites

        dso.data = np.array(raw_data).T

        dso.name = name
        dso.description = 'Imported from FIMA (%s)' % name

        return dso
Esempio n. 5
0
    def load_datafile(self, filename):
        # Determine if we've got a csv or peakml file (extension)
        #self.data.o['output'].empty()
        dso = DataSet()

        # Read data in from peakml format file
        xml = et.parse(filename)

        # Get sample ids, names and class groupings
        sets = xml.iterfind('header/sets/set')
        midclass = {}
        classes = set()
        measurements = []
        masses = {}

        for aset in sets:
            id = aset.find('id').text
            mids = aset.find('measurementids').text
            for mid in self.decode(mids):
                midclass[mid] = id
                measurements.append(mid)

            classes.add(id)

        # We have all the sample data now, parse the intensity and identity info
        peaksets = xml.iterfind('peaks/peak')
        quantities = defaultdict(dict)
        all_identities = []

        for peakset in peaksets:

            # Find metabolite identities
            annotations = peakset.iterfind('annotations/annotation')
            identities = False
            for annotation in annotations:
                if annotation.find('label').text == 'identification':
                    identities = annotation.find('value').text.split(', ')
                    all_identities.extend(identities)
                    break

            if identities:
                # PeakML supports multiple alternative metabolite identities,currently we don't so duplicate
                # We have identities, now get intensities for the different samples
                chromatograms = peakset.iterfind(
                    'peaks/peak')  # Next level down

                for chromatogram in chromatograms:
                    mid = chromatogram.find('measurementid').text
                    intensity = float(chromatogram.find('intensity').text)
                    mass = float(chromatogram.find('mass').text)

                    # Write out to each of the identities table (need to buffer til we have the entire list)
                    for identity in identities:
                        quantities[mid][identity] = intensity

                    # Write out to each of the identities table (need to buffer til we have the entire list)
                    for identity in identities:
                        masses[identity] = mass

        # Sort the identities/masses into consecutive order

        # Quantities table built; class table built; now rearrange into dso
        dso.empty((len(measurements), len(all_identities)))
        dso.labels[0] = measurements
        dso.classes[0] = [midclass[mid] for mid in measurements]

        dso.labels[1] = all_identities
        db_hmdbids = self.m.db.unification['HMDB']
        dso.entities[1] = [
            db_hmdbids[hmdbid] if hmdbid in db_hmdbids else None
            for hmdbid in all_identities
        ]
        dso.scales[1] = [float(masses[i]) for i in all_identities]

        for mid, identities in list(quantities.items()):
            for identity, intensity in list(identities.items()):
                r = measurements.index(mid)
                c = all_identities.index(identity)

                dso.data[r, c] = intensity

        dso.name = os.path.basename(filename)
        dso.description = 'Imported PeakML file'
        self.set_name(dso.name)

        return {'output': dso}
Esempio n. 6
0
    def load_datafile(self, filename):
        # Determine if we've got a csv or peakml file (extension)
        #self.data.o['output'].empty()
        dso = DataSet()

        # Read data in from peakml format file
        xml = et.parse(filename)

        # Get sample ids, names and class groupings
        sets = xml.iterfind('header/sets/set')
        midclass = {}
        classes = set()
        measurements = []
        masses = {}

        for aset in sets:
            id = aset.find('id').text
            mids = aset.find('measurementids').text
            for mid in self.decode(mids):
                midclass[mid] = id
                measurements.append(mid)

            classes.add(id)

        # We have all the sample data now, parse the intensity and identity info
        peaksets = xml.iterfind('peaks/peak')
        quantities = defaultdict(dict)
        all_identities = []

        for peakset in peaksets:

        # Find metabolite identities
            annotations = peakset.iterfind('annotations/annotation')
            identities = False
            for annotation in annotations:
                if annotation.find('label').text == 'identification':
                    identities = annotation.find('value').text.split(', ')
                    all_identities.extend(identities)
                    break

            if identities:
                # PeakML supports multiple alternative metabolite identities,currently we don't so duplicate
                # We have identities, now get intensities for the different samples
                chromatograms = peakset.iterfind('peaks/peak')  # Next level down

                for chromatogram in chromatograms:
                    mid = chromatogram.find('measurementid').text
                    intensity = float(chromatogram.find('intensity').text)
                    mass = float(chromatogram.find('mass').text)

                    # Write out to each of the identities table (need to buffer til we have the entire list)
                    for identity in identities:
                        quantities[mid][identity] = intensity

                    # Write out to each of the identities table (need to buffer til we have the entire list)
                    for identity in identities:
                        masses[identity] = mass

        # Sort the identities/masses into consecutive order


        # Quantities table built; class table built; now rearrange into dso
        dso.empty((len(measurements), len(all_identities)))
        dso.labels[0] = measurements
        dso.classes[0] = [midclass[mid] for mid in measurements]

        dso.labels[1] = all_identities
        db_hmdbids = self.m.db.unification['HMDB']
        dso.entities[1] = [db_hmdbids[hmdbid] if hmdbid in db_hmdbids else None for hmdbid in all_identities]
        dso.scales[1] = [float(masses[i]) for i in all_identities]

        for mid, identities in list(quantities.items()):
            for identity, intensity in list(identities.items()):
                r = measurements.index(mid)
                c = all_identities.index(identity)

                dso.data[r, c] = intensity

        dso.name = os.path.basename(filename)
        dso.description = 'Imported PeakML file'
        self.change_name.emit(dso.name)

        return {'output': dso}