Esempio n. 1
0
    def fetch_raw_replicate_counts(self, tids):

        # fetch raw replicate lanes data
        lanes = (
            db_session.query(RawReplicateCounts)
            .filter(
                RawReplicateCounts.nucleotide_measurement_run_id == self.nucleotide_measurement_run_id,
                RawReplicateCounts.transcript_id.in_(tids),
            )
            .order_by(
                RawReplicateCounts.transcript_id,
                RawReplicateCounts.minusplus_id,
                RawReplicateCounts.bio_replicate_id,
                RawReplicateCounts.tech_replicate_id,
            )
            .all()
        )

        # compile into counts
        counts = {}  # transcript_id => {minus_counts: ... , plus_counts: ... }
        for lane in lanes:
            lane_values = values_str_unpack_float(lane.values)

            # initialise this transcript
            if lane.transcript_id not in counts:
                counts[lane.transcript_id] = {}

            # set the plus or minus counts
            if lane.minusplus_id not in counts[lane.transcript_id]:
                counts[lane.transcript_id][lane.minusplus_id] = lane_values

            else:  # add to existing plus or minus counts
                for pos in range(0, len(lane_values)):
                    counts[lane.transcript_id][lane.minusplus_id][pos] += lane_values[pos]

        # insert the counts into the DB
        for transcript_id in counts:
            transcript_counts = counts[transcript_id]

            # gotta handle the missing data gracefully
            if "minus" not in transcript_counts:
                minus_counts = [0] * len(transcript_counts["plus"])
            else:
                minus_counts = transcript_counts["minus"]

            if "plus" not in transcript_counts:
                plus_counts = [0] * len(transcript_counts["minus"])
            else:
                plus_counts = transcript_counts["plus"]

            measurement_set = RawReactivities(
                nucleotide_measurement_run_id=self.nucleotide_measurement_run_id,
                transcript_id=transcript_id,
                minus_values="\t".join(list(map(str, minus_counts))),
                plus_values="\t".join(list(map(str, plus_counts))),
            )
            db_session.add(measurement_set)
        db_session.commit()
Esempio n. 2
0
    def fetch_raw_replicate_counts(self, tids):
        
        # fetch raw replicate lanes data
        lanes = db_session \
            .query(RawReplicateCounts) \
            .filter(
                RawReplicateCounts.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id,
                RawReplicateCounts.transcript_id.in_(tids)
            ) \
            .order_by(
                RawReplicateCounts.transcript_id,
                RawReplicateCounts.minusplus_id,
                RawReplicateCounts.bio_replicate_id, 
                RawReplicateCounts.tech_replicate_id
            ) \
            .all()

        # compile into counts
        counts = {} # transcript_id => {minus_counts: ... , plus_counts: ... }
        for lane in lanes:
            lane_values = values_str_unpack_float(lane.values)

            # initialise this transcript
            if lane.transcript_id not in counts: 
                counts[lane.transcript_id] = {}

            # set the plus or minus counts
            if lane.minusplus_id not in counts[lane.transcript_id]:
                counts[lane.transcript_id][lane.minusplus_id] = lane_values

            else: # add to existing plus or minus counts
                for pos in range(0, len(lane_values)):
                    counts[lane.transcript_id][lane.minusplus_id][pos] += lane_values[pos]

        # insert the counts into the DB
        for transcript_id in counts:
            transcript_counts = counts[transcript_id]

            # gotta handle the missing data gracefully
            if "minus" not in transcript_counts:
                minus_counts = [0] * len(transcript_counts["plus"])
            else:
                minus_counts = transcript_counts["minus"]

            if "plus" not in transcript_counts:
                plus_counts = [0] * len(transcript_counts["minus"])
            else:
                plus_counts = transcript_counts["plus"]

            measurement_set = RawReactivities(
                nucleotide_measurement_run_id=self.nucleotide_measurement_run_id,
                transcript_id=transcript_id,
                minus_values="\t".join(list(map(str, minus_counts))),
                plus_values="\t".join(list(map(str, plus_counts)))
            )
            db_session.add(measurement_set)
        db_session.commit()
Esempio n. 3
0
    def get_normalised(self):
        # Grab sequence string
        seq_str = Transcript(self.transcript_id).get_sequence_str()

        # Use the ORM to grab all the normalised stuff
        results = db_session \
            .query(NucleotideMeasurementSet) \
            .filter(
                NucleotideMeasurementSet.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id,
                NucleotideMeasurementSet.transcript_id==self.transcript_id
            ) \
            .all()

        measurement_set = results[0]
        # TODO detect whether float or int and use the correct unpacker.
        # Needed for raw count values download option
        unpacked = values_str_unpack_float(measurement_set.values)

        # index measurements by pos
        measurements = {}
        for pos in range(0, len(unpacked)):
            value = unpacked[pos]
            measurements[pos + 1] = "NA" if value == None else value

        # build the output string
        buf = ""
        n = 0
        for n in range(0, len(seq_str)):
            pos = n + 1
            measurement = "NA" if pos not in measurements else measurements[pos]
            buf +=  str(pos)+"\t"+ \
                    seq_str[n]+"\t"+ \
                    str(measurement)+"\n"
            n += 1

        return buf
Esempio n. 4
0
    def get_normalised(self):
        # Grab sequence string
        seq_str = Transcript(self.transcript_id).get_sequence_str()

        # Use the ORM to grab all the normalised stuff
        results = db_session \
            .query(NucleotideMeasurementSet) \
            .filter(
                NucleotideMeasurementSet.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id,
                NucleotideMeasurementSet.transcript_id==self.transcript_id
            ) \
            .all()
        
        measurement_set = results[0]
        # TODO detect whether float or int and use the correct unpacker.
        # Needed for raw count values download option
        unpacked = values_str_unpack_float(measurement_set.values)

        # index measurements by pos
        measurements = {}
        for pos in range(0, len(unpacked)):
            value = unpacked[pos]
            measurements[pos + 1] = "NA" if value == None else value

        # build the output string
        buf = ""
        n = 0
        for n in range(0, len(seq_str)): 
            pos = n + 1
            measurement = "NA" if pos not in measurements else measurements[pos]
            buf +=  str(pos)+"\t"+ \
                    seq_str[n]+"\t"+ \
                    str(measurement)+"\n"
            n += 1

        return buf
Esempio n. 5
0
    def build_entries(self, experiment_ids):
        from models import NucleotideMeasurementRun

        # Load experiments
        experiments = db_session \
            .query(NucleotideMeasurementRun) \
            .filter(NucleotideMeasurementRun.id.in_(experiment_ids)) \
            .all()

        # Load measurements
        seq_str = str(
            Transcript(self.transcript_id).get_sequence(self.strain_id).seq)
        measurements_data = db_session \
            .query(NucleotideMeasurementSet) \
            .filter(
                NucleotideMeasurementSet.nucleotide_measurement_run_id.in_(experiment_ids),
                NucleotideMeasurementSet.transcript_id==self.transcript_id
            ) \
            .all()

        data = {}

        # Populate experiment rows
        for experiment in experiments:
            experiment_data = {
                "id": experiment.id,
                "description": experiment.description,
                "data": []
            }

            for n in range(len(seq_str)):  # initialise the array
                experiment_data["data"].append({
                    "position": n,
                    "nuc": seq_str[n],
                    "measurement": None
                })
            data[experiment.id] = experiment_data

        # Add measurements to each experiment json element
        # Loop since we might be dealing with > 1 measurement set
        for measurement_set in measurements_data:
            experiment_id = measurement_set.nucleotide_measurement_run_id
            measurements = values_str_unpack_float(measurement_set.values)

            for pos in range(0, len(measurements)):
                measurement = measurements[pos]
                data[experiment_id]["data"][pos]["measurement"] = measurement

        # For each experiment, check whether there is no data and set empty flags accordingly.
        self.empty = True  # all empty flag
        for experiment_id in data:
            entry = data[experiment_id]

            empty = True
            for pos in entry["data"]:
                if pos["measurement"] != 0 and pos["measurement"] != None:
                    empty = False
                    self.empty = False

            if empty:
                del entry["data"]
                entry["empty"] = True
            else:
                entry["empty"] = False

        self.data_json = json.dumps(data)
Esempio n. 6
0
    def build_entries(self, experiment_ids):
        from models import NucleotideMeasurementRun

        # Load experiments
        experiments = db_session \
            .query(NucleotideMeasurementRun) \
            .filter(NucleotideMeasurementRun.id.in_(experiment_ids)) \
            .all()

        # Load measurements
        seq_str = str(Transcript(self.transcript_id).get_sequence(self.strain_id).seq)
        measurements_data = db_session \
            .query(NucleotideMeasurementSet) \
            .filter(
                NucleotideMeasurementSet.nucleotide_measurement_run_id.in_(experiment_ids),
                NucleotideMeasurementSet.transcript_id==self.transcript_id
            ) \
            .all()

        data = {}

        # Populate experiment rows
        for experiment in experiments:
            experiment_data = {
                "id": experiment.id,
                "description": experiment.description,
                "data": []
            }
            
            for n in range(len(seq_str)): # initialise the array
                experiment_data["data"].append({
                    "position": n,
                    "nuc": seq_str[n],
                    "measurement": None
                })
            data[experiment.id] = experiment_data

        # Add measurements to each experiment json element
        # Loop since we might be dealing with > 1 measurement set
        for measurement_set in measurements_data:
            experiment_id = measurement_set.nucleotide_measurement_run_id
            measurements = values_str_unpack_float(measurement_set.values)

            for pos in range(0, len(measurements)):
                measurement = measurements[pos]
                data[experiment_id]["data"][pos]["measurement"] = measurement

        # For each experiment, check whether there is no data and set empty flags accordingly.
        self.empty = True # all empty flag
        for experiment_id in data:
            entry = data[experiment_id]

            empty = True
            for pos in entry["data"]:
                if pos["measurement"] != 0 and pos["measurement"] != None:
                    empty = False
                    self.empty = False

            if empty:
                del entry["data"]
                entry["empty"] = True
            else:
                entry["empty"] = False

        self.data_json = json.dumps(data)