def test_simple_array(self):
   a1 = np.array(list('axzbykcq'))
   a2 = np.array(list('12ab34'))
   i1, i2 = np_ext.index_intersect(a1, a2)
   self.assertEqual(i1.tolist(), [0, 3])
   self.assertEqual(i2.tolist(), [2, 3])
   i3, i4 = np_ext.index_intersect(a2, a1)
   self.assertTrue(np.array_equal(i4, i1))
   self.assertTrue(np.array_equal(i3, i2))
 def test_simple_array(self):
     a1 = np.array(list('axzbykcq'))
     a2 = np.array(list('12ab34'))
     i1, i2 = np_ext.index_intersect(a1, a2)
     self.assertEqual(i1.tolist(), [0, 3])
     self.assertEqual(i2.tolist(), [2, 3])
     i3, i4 = np_ext.index_intersect(a2, a1)
     self.assertTrue(np.array_equal(i4, i1))
     self.assertTrue(np.array_equal(i3, i2))
 def test_performance(self):
   size = 1000000
   offset = size/10000
   a1 = np.array(['V%07d' % i for i in xrange(offset, size)])
   a2 = np.array(['V%07d' % i for i in xrange(offset+size)])
   t0 = time.time()
   i1, i2 = np_ext.index_intersect(a1, a2)
   print
   print "finished in %.1f s" % (time.time()-t0)
 def test_performance(self):
     size = 1000000
     offset = size / 10000
     a1 = np.array(['V%07d' % i for i in xrange(offset, size)])
     a2 = np.array(['V%07d' % i for i in xrange(offset + size)])
     t0 = time.time()
     i1, i2 = np_ext.index_intersect(a1, a2)
     print
     print "finished in %.1f s" % (time.time() - t0)
 def test_record_array(self):
   a1 = np.array([('V002', 0), ('V068', 1), ('V129', 0)],
                 dtype=[('marker_vid', '|S34'), ('allele_flip', '|i1')])
   a2 = np.array([('V%03d' % i, 'foo-%03d' % i) for i in xrange(1000)],
                 dtype=[('vid', '|S34'), ('label', '|S48')])
   i1, i2 = np_ext.index_intersect(a1['marker_vid'], a2['vid'])
   self.assertEqual(i1.tolist(), [0, 1, 2])
   self.assertEqual(i2.tolist(), [2, 68, 129])
   self.assertEqual(a1[i1].tolist(), [('V002', 0), ('V068', 1), ('V129', 0)])
   self.assertEqual(
     a2[i2].tolist(),
     [('V002', 'foo-002'), ('V068', 'foo-068'), ('V129', 'foo-129')]
     )
 def test_record_array(self):
     a1 = np.array([('V002', 0), ('V068', 1), ('V129', 0)],
                   dtype=[('marker_vid', '|S34'), ('allele_flip', '|i1')])
     a2 = np.array([('V%03d' % i, 'foo-%03d' % i) for i in xrange(1000)],
                   dtype=[('vid', '|S34'), ('label', '|S48')])
     i1, i2 = np_ext.index_intersect(a1['marker_vid'], a2['vid'])
     self.assertEqual(i1.tolist(), [0, 1, 2])
     self.assertEqual(i2.tolist(), [2, 68, 129])
     self.assertEqual(a1[i1].tolist(), [('V002', 0), ('V068', 1),
                                        ('V129', 0)])
     self.assertEqual(a2[i2].tolist(), [('V002', 'foo-002'),
                                        ('V068', 'foo-068'),
                                        ('V129', 'foo-129')])
  def load_markers(self, batch_size=1000, additional_fields=None):
    """
    Read marker info from the marker set table and store it in the
    markers attribute.

    If additional_fields is provided, it must be a list of fields from
    the marker definition table; in this case, the additional info is
    stored in the add_marker_info attribute.
    """
    data = self.proxy.gadpt.read_snp_markers_set(self.id, batch_size=batch_size)
    self.__set_markers(data)
    if additional_fields is not None:
      if "vid" not in additional_fields:
        additional_fields.append("vid")
      recs = self.proxy.get_snp_marker_definitions(col_names=additional_fields,
                                                   batch_size=batch_size)
      i1, i2 = np_ext.index_intersect(data['marker_vid'], recs['vid'])
      recs = recs[i2]
      # FIXME: this is not very efficient
      by_vid = dict((r['vid'], r) for r in recs)
      recs = np.array([by_vid[d['marker_vid']] for d in data], dtype=recs.dtype)
      self.__set_add_marker_info(recs)
    def intersect(mset1, mset2):
        """
    Returns a pair of equal length numpy arrays where corresponding
    array elements are the indices of markers, respectively in mset1
    and mset2, that align to the same position on the same ref_genome.

    .. code-block:: python

      ref_genome = 'hg19'
      ms1.load_alignments(ref_genome)
      ms2.load_alignments(ref_genome)
      idx1, idx2 = kb.SNPMarkersSet.intersect(ms1, ms2)
      for i1, i2 in it.izip(idx1, idx2):
        assert ms1[i].position == ms2[i].position

    """
        if not (mset1.has_aligns() and mset2.has_aligns()):
            raise ValueError('both mset should be aligned')
        if mset1.ref_genome != mset2.ref_genome:
            raise ValueError('msets should be aligned to the same ref_genome')
        gpos1 = mset1.aligns['global_pos']
        gpos2 = mset2.aligns['global_pos']
        return np_ext.index_intersect(gpos1, gpos2)
  def intersect(mset1, mset2):
    """
    Returns a pair of equal length numpy arrays where corresponding
    array elements are the indices of markers, respectively in mset1
    and mset2, that align to the same position on the same ref_genome.

    .. code-block:: python

      ref_genome = 'hg19'
      ms1.load_alignments(ref_genome)
      ms2.load_alignments(ref_genome)
      idx1, idx2 = kb.SNPMarkersSet.intersect(ms1, ms2)
      for i1, i2 in it.izip(idx1, idx2):
        assert ms1[i].position == ms2[i].position

    """
    if not (mset1.has_aligns() and mset2.has_aligns()):
      raise ValueError('both mset should be aligned')
    if mset1.ref_genome != mset2.ref_genome:
      raise ValueError('msets should be aligned to the same ref_genome')
    gpos1 = mset1.aligns['global_pos']
    gpos2 = mset2.aligns['global_pos']
    return np_ext.index_intersect(gpos1, gpos2)
    def load_markers(self, batch_size=1000, additional_fields=None):
        """
    Read marker info from the marker set table and store it in the
    markers attribute.

    If additional_fields is provided, it must be a list of fields from
    the marker definition table; in this case, the additional info is
    stored in the add_marker_info attribute.
    """
        data = self.proxy.gadpt.read_snp_markers_set(self.id,
                                                     batch_size=batch_size)
        self.__set_markers(data)
        if additional_fields is not None:
            if "vid" not in additional_fields:
                additional_fields.append("vid")
            recs = self.proxy.get_snp_marker_definitions(
                col_names=additional_fields, batch_size=batch_size)
            i1, i2 = np_ext.index_intersect(data['marker_vid'], recs['vid'])
            recs = recs[i2]
            # FIXME: this is not very efficient
            by_vid = dict((r['vid'], r) for r in recs)
            recs = np.array([by_vid[d['marker_vid']] for d in data],
                            dtype=recs.dtype)
            self.__set_add_marker_info(recs)