def fetch_raw_replicate_counts(self, tids): # fetch raw replicate lanes data lanes = ( db_session.query(RawReplicateCounts) .filter( RawReplicateCounts.nucleotide_measurement_run_id == self.nucleotide_measurement_run_id, RawReplicateCounts.transcript_id.in_(tids), ) .order_by( RawReplicateCounts.transcript_id, RawReplicateCounts.minusplus_id, RawReplicateCounts.bio_replicate_id, RawReplicateCounts.tech_replicate_id, ) .all() ) # compile into counts counts = {} # transcript_id => {minus_counts: ... , plus_counts: ... } for lane in lanes: lane_values = values_str_unpack_float(lane.values) # initialise this transcript if lane.transcript_id not in counts: counts[lane.transcript_id] = {} # set the plus or minus counts if lane.minusplus_id not in counts[lane.transcript_id]: counts[lane.transcript_id][lane.minusplus_id] = lane_values else: # add to existing plus or minus counts for pos in range(0, len(lane_values)): counts[lane.transcript_id][lane.minusplus_id][pos] += lane_values[pos] # insert the counts into the DB for transcript_id in counts: transcript_counts = counts[transcript_id] # gotta handle the missing data gracefully if "minus" not in transcript_counts: minus_counts = [0] * len(transcript_counts["plus"]) else: minus_counts = transcript_counts["minus"] if "plus" not in transcript_counts: plus_counts = [0] * len(transcript_counts["minus"]) else: plus_counts = transcript_counts["plus"] measurement_set = RawReactivities( nucleotide_measurement_run_id=self.nucleotide_measurement_run_id, transcript_id=transcript_id, minus_values="\t".join(list(map(str, minus_counts))), plus_values="\t".join(list(map(str, plus_counts))), ) db_session.add(measurement_set) db_session.commit()
def fetch_raw_replicate_counts(self, tids): # fetch raw replicate lanes data lanes = db_session \ .query(RawReplicateCounts) \ .filter( RawReplicateCounts.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id, RawReplicateCounts.transcript_id.in_(tids) ) \ .order_by( RawReplicateCounts.transcript_id, RawReplicateCounts.minusplus_id, RawReplicateCounts.bio_replicate_id, RawReplicateCounts.tech_replicate_id ) \ .all() # compile into counts counts = {} # transcript_id => {minus_counts: ... , plus_counts: ... } for lane in lanes: lane_values = values_str_unpack_float(lane.values) # initialise this transcript if lane.transcript_id not in counts: counts[lane.transcript_id] = {} # set the plus or minus counts if lane.minusplus_id not in counts[lane.transcript_id]: counts[lane.transcript_id][lane.minusplus_id] = lane_values else: # add to existing plus or minus counts for pos in range(0, len(lane_values)): counts[lane.transcript_id][lane.minusplus_id][pos] += lane_values[pos] # insert the counts into the DB for transcript_id in counts: transcript_counts = counts[transcript_id] # gotta handle the missing data gracefully if "minus" not in transcript_counts: minus_counts = [0] * len(transcript_counts["plus"]) else: minus_counts = transcript_counts["minus"] if "plus" not in transcript_counts: plus_counts = [0] * len(transcript_counts["minus"]) else: plus_counts = transcript_counts["plus"] measurement_set = RawReactivities( nucleotide_measurement_run_id=self.nucleotide_measurement_run_id, transcript_id=transcript_id, minus_values="\t".join(list(map(str, minus_counts))), plus_values="\t".join(list(map(str, plus_counts))) ) db_session.add(measurement_set) db_session.commit()
def get_normalised(self): # Grab sequence string seq_str = Transcript(self.transcript_id).get_sequence_str() # Use the ORM to grab all the normalised stuff results = db_session \ .query(NucleotideMeasurementSet) \ .filter( NucleotideMeasurementSet.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id, NucleotideMeasurementSet.transcript_id==self.transcript_id ) \ .all() measurement_set = results[0] # TODO detect whether float or int and use the correct unpacker. # Needed for raw count values download option unpacked = values_str_unpack_float(measurement_set.values) # index measurements by pos measurements = {} for pos in range(0, len(unpacked)): value = unpacked[pos] measurements[pos + 1] = "NA" if value == None else value # build the output string buf = "" n = 0 for n in range(0, len(seq_str)): pos = n + 1 measurement = "NA" if pos not in measurements else measurements[pos] buf += str(pos)+"\t"+ \ seq_str[n]+"\t"+ \ str(measurement)+"\n" n += 1 return buf
def build_entries(self, experiment_ids): from models import NucleotideMeasurementRun # Load experiments experiments = db_session \ .query(NucleotideMeasurementRun) \ .filter(NucleotideMeasurementRun.id.in_(experiment_ids)) \ .all() # Load measurements seq_str = str( Transcript(self.transcript_id).get_sequence(self.strain_id).seq) measurements_data = db_session \ .query(NucleotideMeasurementSet) \ .filter( NucleotideMeasurementSet.nucleotide_measurement_run_id.in_(experiment_ids), NucleotideMeasurementSet.transcript_id==self.transcript_id ) \ .all() data = {} # Populate experiment rows for experiment in experiments: experiment_data = { "id": experiment.id, "description": experiment.description, "data": [] } for n in range(len(seq_str)): # initialise the array experiment_data["data"].append({ "position": n, "nuc": seq_str[n], "measurement": None }) data[experiment.id] = experiment_data # Add measurements to each experiment json element # Loop since we might be dealing with > 1 measurement set for measurement_set in measurements_data: experiment_id = measurement_set.nucleotide_measurement_run_id measurements = values_str_unpack_float(measurement_set.values) for pos in range(0, len(measurements)): measurement = measurements[pos] data[experiment_id]["data"][pos]["measurement"] = measurement # For each experiment, check whether there is no data and set empty flags accordingly. self.empty = True # all empty flag for experiment_id in data: entry = data[experiment_id] empty = True for pos in entry["data"]: if pos["measurement"] != 0 and pos["measurement"] != None: empty = False self.empty = False if empty: del entry["data"] entry["empty"] = True else: entry["empty"] = False self.data_json = json.dumps(data)
def build_entries(self, experiment_ids): from models import NucleotideMeasurementRun # Load experiments experiments = db_session \ .query(NucleotideMeasurementRun) \ .filter(NucleotideMeasurementRun.id.in_(experiment_ids)) \ .all() # Load measurements seq_str = str(Transcript(self.transcript_id).get_sequence(self.strain_id).seq) measurements_data = db_session \ .query(NucleotideMeasurementSet) \ .filter( NucleotideMeasurementSet.nucleotide_measurement_run_id.in_(experiment_ids), NucleotideMeasurementSet.transcript_id==self.transcript_id ) \ .all() data = {} # Populate experiment rows for experiment in experiments: experiment_data = { "id": experiment.id, "description": experiment.description, "data": [] } for n in range(len(seq_str)): # initialise the array experiment_data["data"].append({ "position": n, "nuc": seq_str[n], "measurement": None }) data[experiment.id] = experiment_data # Add measurements to each experiment json element # Loop since we might be dealing with > 1 measurement set for measurement_set in measurements_data: experiment_id = measurement_set.nucleotide_measurement_run_id measurements = values_str_unpack_float(measurement_set.values) for pos in range(0, len(measurements)): measurement = measurements[pos] data[experiment_id]["data"][pos]["measurement"] = measurement # For each experiment, check whether there is no data and set empty flags accordingly. self.empty = True # all empty flag for experiment_id in data: entry = data[experiment_id] empty = True for pos in entry["data"]: if pos["measurement"] != 0 and pos["measurement"] != None: empty = False self.empty = False if empty: del entry["data"] entry["empty"] = True else: entry["empty"] = False self.data_json = json.dumps(data)