def write_block(data_block, plate_barcode, out_dir, snp_name_to_label, header, requested_sample_ids): sample_id = adjust_immuno_sample_id(data_block.sample_id, plate_barcode) if sample_id not in requested_sample_ids: return sample_id, None out_fn = os.path.join(out_dir, "%s.ssc" % sample_id) header["sample_id"] = sample_id stream = MessageStreamWriter(out_fn, PAYLOAD_MSG_TYPE, header) for k in data_block.snp_names(): snp = data_block.snp[k] snp_id = snp_name_to_label[snp["SNP Name"]] call = "%s%s" % (snp["Allele1 - AB"], snp["Allele2 - AB"]) w_aa, w_ab, w_bb = WEIGHTS[call] # GC Score = 0.0 ==> call = '--' stream.write( { "sample_id": sample_id, "snp_id": snp_id, "call": getattr(SnpCall, call, SnpCall.NOCALL), "confidence": float(snp["GC Score"]), # 1-gc_score? 1/gc_score? "sig_A": float(snp["X Raw"]), "sig_B": float(snp["Y Raw"]), "w_AA": w_aa, "w_AB": w_ab, "w_BB": w_bb, } ) stream.close() return sample_id, out_fn
def write_block(data_block, plate_barcode, out_dir, snp_name_to_label, header, requested_sample_ids): sample_id = adjust_immuno_sample_id(data_block.sample_id, plate_barcode) if sample_id not in requested_sample_ids: return sample_id, None out_fn = os.path.join(out_dir, '%s.ssc' % sample_id) header['sample_id'] = sample_id stream = MessageStreamWriter(out_fn, PAYLOAD_MSG_TYPE, header) for k in data_block.snp_names(): snp = data_block.snp[k] snp_id = snp_name_to_label[snp['SNP Name']] call = '%s%s' % (snp['Allele1 - AB'], snp['Allele2 - AB']) w_aa, w_ab, w_bb = WEIGHTS[call] # GC Score = 0.0 ==> call = '--' stream.write({ 'sample_id': sample_id, 'snp_id': snp_id, 'call': getattr(SnpCall, call, SnpCall.NOCALL), 'confidence': float(snp['GC Score']), # 1-gc_score? 1/gc_score? 'sig_A': float(snp['X Raw']), 'sig_B': float(snp['Y Raw']), 'w_AA': w_aa, 'w_AB': w_ab, 'w_BB': w_bb, }) stream.close() return sample_id, out_fn
def make_fake_ssc(mset, sample_id, probs, conf, fn): header = {'markers_set' : mset.label, 'sample_id': sample_id} stream = MessageStreamWriter(fn, PAYLOAD_MSG_TYPE, header) labels = mset.add_marker_info['label'] for l, p_AA, p_BB, c in it.izip(labels, probs[0], probs[1], conf): p_AB = 1.0 - (p_AA + p_BB) w_aa, w_ab, w_bb = p_AA, p_AB, p_BB stream.write({ 'sample_id': sample_id, 'snp_id': l, 'call': SnpCall.NOCALL, # we will not test this anyway 'confidence': float(c), 'sig_A': float(p_AA), 'sig_B': float(p_BB), 'w_AA': float(w_aa), 'w_AB': float(w_ab), 'w_BB': float(w_bb), }) stream.close()
def make_data_object(sample_id, fname, ms): payload_msg_type = 'core.gt.messages.SampleSnpCall' header = {'device_id' : 'FAKE', 'sample_id' : sample_id, 'min_datetime' : datetime.now().isoformat(), 'max_datetime' : datetime.now().isoformat()} stream = MessageStreamWriter(fname, payload_msg_type, header) for m in ms.markers: call, sig_A, sig_B, weight_AA, weight_AB, weight_BB = random.choice( [(SnpCall.AA, 1.0, 0.0, 1.0, 0.0, 0.0), (SnpCall.AB, 1.0, 1.0, 0.0, 1.0, 0.0), (SnpCall.BB, 0.0, 1.0, 0.0, 0.0, 1.0)]) stream.write({ 'sample_id' : sample_id, 'snp_id' : m.id, 'call' : call, 'confidence' : 0.0, 'sig_A' : sig_A, 'sig_B' : sig_B, 'weight_AA' : weight_AA, 'weight_AB' : weight_AB, 'weight_BB' : weight_BB}) stream.close()
def write_ssc_data_set_file(fname, found_markers, device_id, sample_id, min_datetime, max_datetime, data): payload_msg_type = 'core.gt.messages.SampleSnpCall' header = {'device_id' : device_id, 'sample_id' : sample_id, 'min_datetime' : '%s' % min_datetime, 'max_datetime' : '%s' % max_datetime} stream = MessageStreamWriter(fname, payload_msg_type, header) for d in data: found_marker = found_markers[d['Marker Name']] m = found_marker['kb_marker'] stream.write({ 'sample_id' : sample_id, 'snp_id' : m.id, 'call' : canonize_call(m.mask, d['Call']), 'confidence' : d['Quality Value'], 'sig_A' : d['Allele X Rn']*d['Passive Ref'], 'sig_B' : d['Allele X Rn']*d['Passive Ref'], 'w_AA' : d['Allele X Rn'], 'w_AB' : 0.0, 'w_BB' : d['Allele X Rn']}) stream.close()