def test_simple_array(self): a1 = np.array(list('axzbykcq')) a2 = np.array(list('12ab34')) i1, i2 = np_ext.index_intersect(a1, a2) self.assertEqual(i1.tolist(), [0, 3]) self.assertEqual(i2.tolist(), [2, 3]) i3, i4 = np_ext.index_intersect(a2, a1) self.assertTrue(np.array_equal(i4, i1)) self.assertTrue(np.array_equal(i3, i2))
def test_performance(self): size = 1000000 offset = size/10000 a1 = np.array(['V%07d' % i for i in xrange(offset, size)]) a2 = np.array(['V%07d' % i for i in xrange(offset+size)]) t0 = time.time() i1, i2 = np_ext.index_intersect(a1, a2) print print "finished in %.1f s" % (time.time()-t0)
def test_performance(self): size = 1000000 offset = size / 10000 a1 = np.array(['V%07d' % i for i in xrange(offset, size)]) a2 = np.array(['V%07d' % i for i in xrange(offset + size)]) t0 = time.time() i1, i2 = np_ext.index_intersect(a1, a2) print print "finished in %.1f s" % (time.time() - t0)
def test_record_array(self): a1 = np.array([('V002', 0), ('V068', 1), ('V129', 0)], dtype=[('marker_vid', '|S34'), ('allele_flip', '|i1')]) a2 = np.array([('V%03d' % i, 'foo-%03d' % i) for i in xrange(1000)], dtype=[('vid', '|S34'), ('label', '|S48')]) i1, i2 = np_ext.index_intersect(a1['marker_vid'], a2['vid']) self.assertEqual(i1.tolist(), [0, 1, 2]) self.assertEqual(i2.tolist(), [2, 68, 129]) self.assertEqual(a1[i1].tolist(), [('V002', 0), ('V068', 1), ('V129', 0)]) self.assertEqual( a2[i2].tolist(), [('V002', 'foo-002'), ('V068', 'foo-068'), ('V129', 'foo-129')] )
def test_record_array(self): a1 = np.array([('V002', 0), ('V068', 1), ('V129', 0)], dtype=[('marker_vid', '|S34'), ('allele_flip', '|i1')]) a2 = np.array([('V%03d' % i, 'foo-%03d' % i) for i in xrange(1000)], dtype=[('vid', '|S34'), ('label', '|S48')]) i1, i2 = np_ext.index_intersect(a1['marker_vid'], a2['vid']) self.assertEqual(i1.tolist(), [0, 1, 2]) self.assertEqual(i2.tolist(), [2, 68, 129]) self.assertEqual(a1[i1].tolist(), [('V002', 0), ('V068', 1), ('V129', 0)]) self.assertEqual(a2[i2].tolist(), [('V002', 'foo-002'), ('V068', 'foo-068'), ('V129', 'foo-129')])
def load_markers(self, batch_size=1000, additional_fields=None): """ Read marker info from the marker set table and store it in the markers attribute. If additional_fields is provided, it must be a list of fields from the marker definition table; in this case, the additional info is stored in the add_marker_info attribute. """ data = self.proxy.gadpt.read_snp_markers_set(self.id, batch_size=batch_size) self.__set_markers(data) if additional_fields is not None: if "vid" not in additional_fields: additional_fields.append("vid") recs = self.proxy.get_snp_marker_definitions(col_names=additional_fields, batch_size=batch_size) i1, i2 = np_ext.index_intersect(data['marker_vid'], recs['vid']) recs = recs[i2] # FIXME: this is not very efficient by_vid = dict((r['vid'], r) for r in recs) recs = np.array([by_vid[d['marker_vid']] for d in data], dtype=recs.dtype) self.__set_add_marker_info(recs)
def intersect(mset1, mset2): """ Returns a pair of equal length numpy arrays where corresponding array elements are the indices of markers, respectively in mset1 and mset2, that align to the same position on the same ref_genome. .. code-block:: python ref_genome = 'hg19' ms1.load_alignments(ref_genome) ms2.load_alignments(ref_genome) idx1, idx2 = kb.SNPMarkersSet.intersect(ms1, ms2) for i1, i2 in it.izip(idx1, idx2): assert ms1[i].position == ms2[i].position """ if not (mset1.has_aligns() and mset2.has_aligns()): raise ValueError('both mset should be aligned') if mset1.ref_genome != mset2.ref_genome: raise ValueError('msets should be aligned to the same ref_genome') gpos1 = mset1.aligns['global_pos'] gpos2 = mset2.aligns['global_pos'] return np_ext.index_intersect(gpos1, gpos2)
def load_markers(self, batch_size=1000, additional_fields=None): """ Read marker info from the marker set table and store it in the markers attribute. If additional_fields is provided, it must be a list of fields from the marker definition table; in this case, the additional info is stored in the add_marker_info attribute. """ data = self.proxy.gadpt.read_snp_markers_set(self.id, batch_size=batch_size) self.__set_markers(data) if additional_fields is not None: if "vid" not in additional_fields: additional_fields.append("vid") recs = self.proxy.get_snp_marker_definitions( col_names=additional_fields, batch_size=batch_size) i1, i2 = np_ext.index_intersect(data['marker_vid'], recs['vid']) recs = recs[i2] # FIXME: this is not very efficient by_vid = dict((r['vid'], r) for r in recs) recs = np.array([by_vid[d['marker_vid']] for d in data], dtype=recs.dtype) self.__set_add_marker_info(recs)