Exemple #1
0
def index_repeatmasker_alignment_by_id(fh, out_fh, vebrose=False):
    """Build an index for a repeat-masker alignment file by repeat-masker ID."""
    def extract_UID(rm_alignment):
        return rm_alignment.meta[multipleAlignment.RM_ID_KEY]

    index = IndexedFile(fh, repeat_masker_alignment_iterator, extract_UID)
    index.write_index(out_fh)
Exemple #2
0
def index_genome_alignment_by_locus(fh, out_fh, verbose=False):
  """Build an index for a genome alig. using coords in ref genome as keys."""
  bound_iter = functools.partial(genome_alignment_iterator,
                                 reference_species="hg19", index_friendly=True)
  hash_func = JustInTimeGenomeAlignmentBlock.build_hash
  idx = IndexedFile(fh, bound_iter, hash_func)
  idx.write_index(out_fh, verbose=verbose)
Exemple #3
0
    def test_index_lookup(self, mock_open):
        """Test lookup of specific key using full UI."""
        in_strm = StringIO.StringIO(self.ga_maf1)
        idx_strm = StringIO.StringIO()
        out_strm = StringIO.StringIO()

        # replace open with mock
        def open_side_effect(*args, **kwargs):
            if not isinstance(args[0], basestring):
                raise TypeError()
            if args[0] == "one.maf":
                return in_strm
            elif args[0] == "one.idx":
                return idx_strm
            elif args[0] == "out.txt":
                return out_strm
            raise IOError("No such file")

        mock_open.side_effect = open_side_effect

        # build and index in idx_strm
        bound_iter = functools.partial(genome_alignment_iterator,
                                       reference_species="hg19")
        hash_func = JustInTimeGenomeAlignmentBlock.build_hash
        idx = IndexedFile(StringIO.StringIO(self.ga_maf1), bound_iter,
                          hash_func)
        idx.write_index(idx_strm)
        idx_strm.seek(0)

        key = "chr22" + "\t" + "1772" + "\t" + "1825"
        main(["lookup", "-o", "out.txt", "-k", key, "one.maf", "one.idx"])
        self.assertEqual(
            str(idx[key]).strip(),
            str(out_strm.getvalue()).strip())
Exemple #4
0
def index_repeatmasker_alignment_by_id(fh, out_fh, vebrose=False):
  """Build an index for a repeat-masker alignment file by repeat-masker ID."""
  def extract_UID(rm_alignment):
    return rm_alignment.meta[multipleAlignment.RM_ID_KEY]

  index = IndexedFile(fh, repeat_masker_alignment_iterator, extract_UID)
  index.write_index(out_fh)
Exemple #5
0
  def test_index_lookup(self, mock_open):
    """Test lookup of specific key using full UI."""
    in_strm = StringIO.StringIO(self.ga_maf1)
    idx_strm = StringIO.StringIO()
    out_strm = StringIO.StringIO()

    # replace open with mock
    def open_side_effect(*args, **kwargs):
      if not isinstance(args[0], basestring):
        raise TypeError()
      if args[0] == "one.maf":
        return in_strm
      elif args[0] == "one.idx":
        return idx_strm
      elif args[0] == "out.txt":
        return out_strm
      raise IOError("No such file")

    mock_open.side_effect = open_side_effect

    # build and index in idx_strm
    bound_iter = functools.partial(genome_alignment_iterator,
                                   reference_species="hg19")
    hash_func = JustInTimeGenomeAlignmentBlock.build_hash
    idx = IndexedFile(StringIO.StringIO(self.ga_maf1), bound_iter, hash_func)
    idx.write_index(idx_strm)
    idx_strm.seek(0)

    key = "chr22" + "\t" + "1772" + "\t" + "1825"
    main(["lookup", "-o", "out.txt", "-k", key, "one.maf", "one.idx"])
    self.assertEqual(str(idx[key]).strip(), str(out_strm.getvalue()).strip())
Exemple #6
0
def lookup_genome_alignment_index(index_fh, indexed_fh, out_fh=sys.stdout,
                                  key=None, verbose=False):
  """Load a GA index and its indexed file and extract one or more blocks.

  :param index_fh:   the index file to load. Can be a filename or a
                     stream-like object.
  :param indexed_fh: the file that the index was built for,
  :param key:        A single key, iterable of keys, or None. This key will be
                     used for lookup. If None, user is prompted to enter keys
                     interactively.
  """
  # load the genome alignment as a JIT object
  bound_iter = functools.partial(genome_alignment_iterator,
                                 reference_species="hg19", index_friendly=True)
  hash_func = JustInTimeGenomeAlignmentBlock.build_hash
  idx = IndexedFile(record_iterator=bound_iter, record_hash_function=hash_func)
  idx.read_index(index_fh, indexed_fh)

  if key is None:
    while key is None or key.strip() != "":
      sys.stderr.write("[WAITING FOR KEY ENTRY ON STDIN; " +
                       "END WITH EMPTY LINE]\n")
      key = raw_input()
      # we know keys for genome alignments have tabs as delims, so..
      key = '\t'.join(key.split()).strip()
      if key != "":
        out_fh.write(str(idx[key]) + "\n")
      sys.stderr.write("\n")
  else:
    # we know keys for genome alignments have tabs as delims, so..
    key = '\t'.join(key.split())
    out_fh.write(str(idx[key]) + "\n")
Exemple #7
0
def build_genome_alignment_from_file(ga_path, ref_spec, idx_path=None,
                                     verbose=False):
  """
  build a genome alignment by loading from a single MAF file.

  :param ga_path:  the path to the file to load.
  :param ref_spec: which species in the MAF file is the reference?
  :param idx_path: if provided, use this index to generate a just-in-time
                   genome alignment, instead of loading the file immediately.
  """
  blocks = []
  if (idx_path is not None):
    bound_iter = functools.partial(genome_alignment_iterator,
                                   reference_species=ref_spec)
    hash_func = JustInTimeGenomeAlignmentBlock.build_hash
    factory = IndexedFile(None, bound_iter, hash_func)
    factory.read_index(idx_path, ga_path, verbose=verbose)

    pind = None
    for k in factory:
      if verbose:
        if pind is None:
          total = len(factory)
          pind = ProgressIndicator(totalToDo=total, messagePrefix="completed",
                                   messageSuffix="building alignment blocks ")
        pind.done += 1
        pind.showProgress()
      blocks.append(JustInTimeGenomeAlignmentBlock(factory, k))
  else:
    for b in genome_alignment_iterator(ga_path, ref_spec, verbose=verbose):
      blocks.append(b)
  return GenomeAlignment(blocks, verbose)
Exemple #8
0
def index_genome_alignment_by_locus(fh, out_fh, verbose=False):
    """Build an index for a genome alig. using coords in ref genome as keys."""
    bound_iter = functools.partial(genome_alignment_iterator,
                                   reference_species="hg19",
                                   index_friendly=True)
    hash_func = JustInTimeGenomeAlignmentBlock.build_hash
    idx = IndexedFile(fh, bound_iter, hash_func)
    idx.write_index(out_fh, verbose=verbose)
Exemple #9
0
def _build_index(in_strng, ref_spec):
  idx_strm = StringIO.StringIO()
  bound_iter = functools.partial(genome_alignment_iterator,
                                 reference_species=ref_spec)
  hash_func = JustInTimeGenomeAlignmentBlock.build_hash
  idx = IndexedFile(StringIO.StringIO(in_strng), bound_iter, hash_func)
  idx.write_index(idx_strm)
  idx_strm.seek(0)  # seek to the start
  return idx_strm
Exemple #10
0
def _build_index(in_strng, ref_spec):
    idx_strm = StringIO.StringIO()
    bound_iter = functools.partial(genome_alignment_iterator,
                                   reference_species=ref_spec)
    hash_func = JustInTimeGenomeAlignmentBlock.build_hash
    idx = IndexedFile(StringIO.StringIO(in_strng), bound_iter, hash_func)
    idx.write_index(idx_strm)
    idx_strm.seek(0)  # seek to the start
    return idx_strm
Exemple #11
0
def _build_index(maf_strm, ref_spec):
    """Build an index for a MAF genome alig file and return StringIO of it."""
    idx_strm = StringIO.StringIO()
    bound_iter = functools.partial(genome_alignment_iterator,
                                   reference_species=ref_spec)
    hash_func = JustInTimeGenomeAlignmentBlock.build_hash
    idx = IndexedFile(maf_strm, bound_iter, hash_func)
    idx.write_index(idx_strm)
    idx_strm.seek(0)  # seek to the start
    return idx_strm
Exemple #12
0
def _build_index(maf_strm, ref_spec):
  """Build an index for a MAF genome alig file and return StringIO of it."""
  idx_strm = StringIO.StringIO()
  bound_iter = functools.partial(genome_alignment_iterator,
                                 reference_species=ref_spec)
  hash_func = JustInTimeGenomeAlignmentBlock.build_hash
  idx = IndexedFile(maf_strm, bound_iter, hash_func)
  idx.write_index(idx_strm)
  idx_strm.seek(0)  # seek to the start
  return idx_strm
Exemple #13
0
def lookup_genome_alignment_index(index_fh,
                                  indexed_fh,
                                  out_fh=sys.stdout,
                                  key=None,
                                  verbose=False):
    """Load a GA index and its indexed file and extract one or more blocks.

  :param index_fh:   the index file to load. Can be a filename or a
                     stream-like object.
  :param indexed_fh: the file that the index was built for,
  :param key:        A single key, iterable of keys, or None. This key will be
                     used for lookup. If None, user is prompted to enter keys
                     interactively.
  """
    # load the genome alignment as a JIT object
    bound_iter = functools.partial(genome_alignment_iterator,
                                   reference_species="hg19",
                                   index_friendly=True)
    hash_func = JustInTimeGenomeAlignmentBlock.build_hash
    idx = IndexedFile(record_iterator=bound_iter,
                      record_hash_function=hash_func)
    idx.read_index(index_fh, indexed_fh)

    if key is None:
        while key is None or key.strip() != "":
            sys.stderr.write("[WAITING FOR KEY ENTRY ON STDIN; " +
                             "END WITH EMPTY LINE]\n")
            key = raw_input()
            # we know keys for genome alignments have tabs as delims, so..
            key = '\t'.join(key.split()).strip()
            if key != "":
                out_fh.write(str(idx[key]) + "\n")
            sys.stderr.write("\n")
    else:
        # we know keys for genome alignments have tabs as delims, so..
        key = '\t'.join(key.split())
        out_fh.write(str(idx[key]) + "\n")
Exemple #14
0
def build_genome_alignment_from_file(ga_path,
                                     ref_spec,
                                     idx_path=None,
                                     verbose=False):
    """
  build a genome alignment by loading from a single MAF file.

  :param ga_path:  the path to the file to load.
  :param ref_spec: which species in the MAF file is the reference?
  :param idx_path: if provided, use this index to generate a just-in-time
                   genome alignment, instead of loading the file immediately.
  """
    blocks = []
    if (idx_path is not None):
        bound_iter = functools.partial(genome_alignment_iterator,
                                       reference_species=ref_spec)
        hash_func = JustInTimeGenomeAlignmentBlock.build_hash
        factory = IndexedFile(None, bound_iter, hash_func)
        factory.read_index(idx_path, ga_path, verbose=verbose)

        pind = None
        for k in factory:
            if verbose:
                if pind is None:
                    total = len(factory)
                    pind = ProgressIndicator(
                        totalToDo=total,
                        messagePrefix="completed",
                        messageSuffix="building alignment blocks ")
                pind.done += 1
                pind.showProgress()
            blocks.append(JustInTimeGenomeAlignmentBlock(factory, k))
    else:
        for b in genome_alignment_iterator(ga_path, ref_spec, verbose=verbose):
            blocks.append(b)
    return GenomeAlignment(blocks, verbose)
Exemple #15
0
    def test_repeat_masker_on_demand_load(self):
        """
    Tests wrapping the alignment iterator in an index and using this index
    to build RM alignment objects that are loaded on-demand from the indexed
    stream.
    """
        from pyokit.io.indexedFile import IndexedFile

        def extract_UID(rm_alignment):
            return rm_alignment.meta[RM_ID_KEY]

        s_io = StringIO.StringIO(self.rm_rc_1_input)
        index = IndexedFile(s_io, repeat_masker_alignment_iterator,
                            extract_UID)

        for i, trail_meta_size, c_width, m_width, rm_id in [
            (0, 4, 29, None, 5), (1, 4, 30, None, 10), (2, 3, 31, 13, 15),
            (3, 0, 22, 13, 231)
        ]:
            on_d_alig = JustInTimePairwiseAlignment(index, rm_id)
            on_d_str = _to_repeatmasker_string(on_d_alig,
                                               column_width=c_width,
                                               m_name_width=m_width)

            # strip out the last few lines; these should all be their, but order
            # isn't important.
            alig_actual = [
                x for x in map(
                    str.rstrip, self.rm_tc1_records[i].split("\n")
                    [:-trail_meta_size]) if x.strip() != ""
            ]
            meta_actual = map(
                str.rstrip,
                self.rm_tc1_records[i].split("\n")[-trail_meta_size:])
            alig_result = [
                x for x in map(str.rstrip,
                               on_d_str.split("\n")[:-trail_meta_size])
                if x.strip() != ""
            ]
            meta_result = map(str.rstrip,
                              on_d_str.split("\n")[-trail_meta_size:])

            self.failUnlessEqual(alig_actual, alig_result)
            self.failUnlessEqual(set(meta_actual), set(meta_result))
Exemple #16
0
  def test_iterator_with_alignment_index(self):
    def extract_UID(rm_alignment):
      return rm_alignment.meta[repeatmaskerAlignments.RM_ID_KEY]

    s_io = StringIO.StringIO(self.rm_rc_1_input)
    index = IndexedFile(s_io, repeat_masker_alignment_iterator, extract_UID)

    elems = [x for x in repeat_masker_iterator(StringIO.StringIO(self.ann),
                                               alignment_index=index)]
    self.assertEqual(len(elems), 6)
    for i in range(0, len(elems)):
      an = retrotransposon.from_repeat_masker_string(self.indv_an[i])
      self.assertEqual(elems[i], an)

    # alignments were provided, liftover should be using them; test one
    # to make sure they were matched up properly
    r = elems[0].liftover(GenomicInterval("chr1", 10, 100))
    self.assertEqual(r, [(132, 142), (120, 131), (88, 118), (85, 87)])
    # also test one of the ones that had no alignment; here we expect failure
    self.assertRaises(IndexError, elems[4].liftover,
                      GenomicInterval("chr1", 15200, 15400))