Example #1
0
def filter_fragment_coverage(flist, coverage):

    cov_counts = defaultdict(int)

    for f in flist:

        for a in f.seq:

            cov_counts[a[0]] += 1

    filtered_flist = []

    # for each fragment and name
    for f in flist:

        new_seq = []

        for a in f.seq:

            # if coverage greater than 1
            if cov_counts[a[0]] >= coverage:

                new_seq.append(a)

        # if the fragment has at least 2 alleles after filtering add it to new list
        if len(new_seq) >= 2:

            filtered_flist.append(
                fragment.fragment(new_seq, f.name, f.first_piece,
                                  f.last_piece))

    return filtered_flist
Example #2
0
    def read_package(self, package):
        """reads the dom element package and sets internal state according to it"""
        # the standard
        std = self.paper and self.paper.standard or None

        self.name = package.getAttribute('name')
        if package.getAttribute('id'):
            self.id = package.getAttribute('id')
        for name, cls in {
                'atom': atom,
                'group': group,
                'text': textatom,
                'query': queryatom
        }.iteritems():
            for a in dom_extensions.simpleXPathSearch(package, name):
                self.insert_atom(cls(standard=std, package=a, molecule=self))

        self._id_map = [a.id for a in self.atoms]
        for b in dom_extensions.simpleXPathSearch(package, 'bond'):
            bnd = bond(standard=std, package=b, molecule=self)
            self.add_edge(bnd.atom1, bnd.atom2, bnd)
        # template related attributes
        temp = package.getElementsByTagName('template')
        if temp:
            temp = temp[0]
            self.t_atom = Store.id_manager.get_object_with_id(
                temp.getAttribute('atom'))
            if temp.getAttribute('bond_first') and temp.getAttribute(
                    'bond_second'):
                self.t_bond_first = Store.id_manager.get_object_with_id(
                    temp.getAttribute('bond_first'))
                self.t_bond_second = Store.id_manager.get_object_with_id(
                    temp.getAttribute('bond_second'))
            self.next_to_t_atom = self.t_atom.neighbors[0]
        # display form
        df = package.getElementsByTagName('display-form')
        if df:
            df = df[0]
            self.display_form = ''.join([e.toxml() for e in df.childNodes
                                         ]).encode('utf-8')

        # fragments
        for fel in dom_extensions.simpleXPathSearch(package, "fragment"):
            f = fragment()
            try:
                f.read_package(fel)
            except bkchem_exceptions.bkchem_fragment_error:
                pass
            else:
                self.fragments.add(f)

        ud = dom_extensions.getChildrenNamed(package, "user-data")
        if ud:
            self.user_data = [u.cloneNode(True) for u in ud]

        # final check of atoms valecies
        [
            a.raise_valency_to_senseful_value() for a in self.vertices
            if isinstance(a, atom)
        ]
Example #3
0
 def create_fragment( self, name, edges, vertices, type="explicit", strict=False):
   if (strict and self.defines_connected_subgraph_e( edges)) or not strict:
     nf = fragment( Store.id_manager.generate_id( "frag"), name=name, type=type)
     nf.edges = set( edges)
     nf.vertices = set( vertices)
     self.fragments.add( nf)
     return nf
   else:
     return None
Example #4
0
def split_fragment_hets(frag, min_het_count=3, max_het_frac=0.25):

    breakpos = get_het_breakpoints(frag, min_het_count, max_het_frac)

    new_fragment_pieces = []
    new_piece = []
    split_occured = False
    for pos, gpos, call, qual in frag.seq:
        if pos in breakpos:
            split_occured = True
            if len(new_piece) >= 2:
                new_fragment_pieces.append(new_piece)
            new_piece = []
        else:
            if call != 'M':
                new_piece.append((pos, gpos, call, qual))

    if len(new_piece) >= 2:
        new_fragment_pieces.append(new_piece)

    new_piece = []
    if new_fragment_pieces == []:
        return []

    new_fragment_piece_objects = []
    if split_occured:
        for i, piece in enumerate(new_fragment_pieces):
            new_name = '{}:H{}'.format(frag.name, i + 1)
            first_piece = False
            last_piece = False
            if i == 0 and frag.first_piece:
                first_piece = True
            if i == len(new_fragment_pieces) - 1 and frag.last_piece:
                last_piece = True
            new_fragment_piece_objects.append(
                fragment.fragment(piece, new_name, first_piece, last_piece))
    else:
        new_fragment_piece_objects.append(
            fragment.fragment(new_fragment_pieces[0], frag.name,
                              frag.first_piece, frag.last_piece))
    return new_fragment_piece_objects
Example #5
0
 def create_fragment(self,
                     name,
                     edges,
                     vertices,
                     type="explicit",
                     strict=False):
     if (strict and self.defines_connected_subgraph_e(edges)) or not strict:
         nf = fragment(Store.id_manager.generate_id("frag"),
                       name=name,
                       type=type)
         nf.edges = set(edges)
         nf.vertices = set(vertices)
         self.fragments.add(nf)
         return nf
     else:
         return None
Example #6
0
  def read_package( self, package):
    """reads the dom element package and sets internal state according to it"""
    # the standard
    std = self.paper and self.paper.standard or None

    self.name = package.getAttribute( 'name')
    if package.getAttribute( 'id'):
      self.id = package.getAttribute( 'id')
    for name, cls in {'atom': atom, 'group': group, 'text': textatom, 'query': queryatom}.items():
      for a in dom_extensions.simpleXPathSearch( package, name):
        self.insert_atom( cls( standard=std, package=a, molecule=self))

    self._id_map = [a.id for a in self.atoms]
    for b in dom_extensions.simpleXPathSearch( package, 'bond'):
      bnd = bond( standard=std, package=b, molecule=self)
      self.add_edge( bnd.atom1, bnd.atom2, bnd)
    # template related attributes
    temp = package.getElementsByTagName('template')
    if temp:
      temp = temp[0]
      self.t_atom = Store.id_manager.get_object_with_id( temp.getAttribute( 'atom'))
      if temp.getAttribute('bond_first') and temp.getAttribute('bond_second'):
        self.t_bond_first = Store.id_manager.get_object_with_id( temp.getAttribute( 'bond_first'))
        self.t_bond_second = Store.id_manager.get_object_with_id( temp.getAttribute( 'bond_second'))
      self.next_to_t_atom = self.t_atom.neighbors[0]
    # display form
    df = package.getElementsByTagName('display-form')
    if df:
      df = df[0]
      self.display_form = ''.join( [e.toxml('utf-8') for e in df.childNodes])

    # fragments
    for fel in dom_extensions.simpleXPathSearch( package, "fragment"):
      f = fragment()
      try:
        f.read_package( fel)
      except bkchem_exceptions.bkchem_fragment_error:
        pass
      else:
        self.fragments.add( f)

    ud = dom_extensions.getChildrenNamed( package, "user-data")
    if ud:
      self.user_data = [u.cloneNode( True) for u in ud]

    # final check of atoms valecies
    [a.raise_valency_to_senseful_value() for a in self.vertices if isinstance( a, atom)]
Example #7
0
def filter_on_VCF(flist, vcf_filter):

    vcf_set = set()
    file_chrom = None  # whole VCF file should have this chromosome.
    with open(vcf_filter, 'r') as infile:
        for line in infile:
            if line[:1] == '#':
                continue
            el = line.strip().split('\t')
            if len(el) < 5:
                continue

            vcf_chrom = el[0]
            if file_chrom == None:
                file_chrom = vcf_chrom
            elif vcf_chrom != file_chrom:
                print("ERROR, multi-chromosomal VCF.")
                exit(1)

            genomic_pos = int(el[1]) - 1
            vcf_set.add(genomic_pos)

    filtered_flist = []

    # for each fragment and name
    for f in flist:

        new_seq = []

        for a in f.seq:

            # if coverage greater than 1
            if a[1] in vcf_set:

                new_seq.append(a)

        # if the fragment has at least 2 alleles after filtering add it to new list
        if len(new_seq) >= 2:

            filtered_flist.append(
                fragment.fragment(new_seq, f.name, f.first_piece,
                                  f.last_piece))

    return filtered_flist
Example #8
0
def reduce(array, op):
    """Perform a tree-like reduction over all axes of *array*.

    :param array: *pyDive.ndarray*, *pyDive.h5_ndarray* or *pyDive.cloned_ndarray* to be reduced
    :param numpy-ufunc op: reduce operation, e.g. *numpy.add*.

    If the hdf5 data exceeds the memory limit (currently 25% of the combined main memory of all cluster nodes)\
    the data will be read block-wise so that a block fits into memory.
    """
    def reduce_wrapper(array_name, op_name):
        array = globals()[array_name]
        op =  eval("np." + op_name)
        return algorithm.__tree_reduce(array, axis=None, op=op) # reduction over all axes

    view = com.getView()

    tmp_targets = view.targets # save current target list
    if type(array) == VirtualArrayOfStructs:
        view.targets = array.firstArray.target_ranks
    else:
        view.targets = array.target_ranks

    result = None

    if (hasattr(array, "arraytype") and array.arraytype in hdd_arraytypes) or type(array) in hdd_arraytypes:
        for chunk in fragment(array):
            array_name = repr(chunk)

            targets_results = view.apply(interactive(reduce_wrapper), array_name, op.__name__)
            chunk_result = op.reduce(targets_results) # reduce over targets' results

            if result is None:
                result = chunk_result
            else:
                result = op(result, chunk_result)
    else:
        array_name = repr(array)

        targets_results = view.apply(interactive(reduce_wrapper), array_name, op.__name__)
        result = op.reduce(targets_results) # reduce over targets' results

    view.targets = tmp_targets # restore target list
    return result
Example #9
0
def filter_het_positions(flist):

    filtered_flist = []

    # for each fragment and name
    for f in flist:

        new_seq = []

        for a in f.seq:

            # if coverage greater than 1
            if a[2] != 'M':

                new_seq.append(a)

        # if the fragment has at least 2 alleles after filtering add it to new list
        if len(new_seq) >= 2:

            filtered_flist.append(
                fragment.fragment(new_seq, f.name, f.first_piece,
                                  f.last_piece))

    return filtered_flist
Example #10
0
s = socket.socket(socket.AF_LORA, socket.SOCK_RAW)
s.setsockopt(socket.SOL_LORA, socket.SO_DR, 5)
s.setsockopt(socket.SOL_LORA,  socket.SO_CONFIRMED,  False)

pycom.heartbeat(False)

# create a message for the trigger.
message = "Hello LoRa"
fmt = ">%ds" % len(message)
buf = struct.pack(fmt, message)
l2_size = len(message)  # it must be set in each sending message.
s.send(buf)

# fragment instance
# XXX rule_id and dtag are zero for the ietf100 testing.
fg = fragment.fragment(buf, 0, 0, window_size=1)

dfg = fragment.defragment_factory()

while True:
    pycom.rgbled(0xFF0000)
    s.setblocking(True)
    s.settimeout(10)

    # waiting somethign from the server
    try:
        rx_data = s.recv(64)
        print("received:", rx_data)
        # trying to defrag
        ret, buf = dfg.defrag(rx_data)
        if ret == _SCHC_DEFRAG_NOTYET:
Example #11
0
def map(f, *arrays, **kwargs):
    """Applies *f* on :term:`engine` on local arrays related to *arrays*.
    Example: ::

        cluster_array = pyDive.ones(shape=[100], distaxes=0)

        cluster_array *= 2.0
        # equivalent to
        pyDive.map(lambda a: a *= 2.0, cluster_array) # a is the local numpy-array of *cluster_array*

    Or, as a decorator: ::

        @pyDive.map
        def twice(a):
            a *= 2.0

        twice(cluster_array)

    :param callable f: function to be called on :term:`engine`. Has to accept *numpy-arrays* and *kwargs*
    :param arrays: list of arrays including *pyDive.ndarrays*, *pyDive.h5_ndarrays* or *pyDive.cloned_ndarrays*
    :param kwargs: user-specified keyword arguments passed to *f*
    :raises AssertionError: if the *shapes* of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match
    :raises AssertionError: if the *distaxes* attributes of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match

    Notes:
        - If the hdf5 data exceeds the memory limit (currently 25% of the combined main memory of all cluster nodes)\
            the data will be read block-wise so that a block fits into memory.
        - *map* chooses the list of *engines* from the **first** element of *arrays*. On these engines *f* is called.\
            If the first array is a *pyDive.h5_ndarray* all engines will be used.
        - *map* is not writing data back to a *pyDive.h5_ndarray* yet.
        - *map* does not equalize the element distribution of *pyDive.ndarrays* before execution.
    """
    if not arrays:
        # decorator mode
        def map_deco(*arrays, **kwargs):
            map(f, *arrays, **kwargs)
        return map_deco

    def map_wrapper(f, array_names, **kwargs):
        arrays = [globals()[array_name] for array_name in array_names]
        f(*arrays, **kwargs)

    view = com.getView()

    tmp_targets = view.targets # save current target list
    if type(arrays[0]) == VirtualArrayOfStructs:
        view.targets = arrays[0].firstArray.target_ranks
    else:
        view.targets = arrays[0].target_ranks

    hdd_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype in hdd_arraytypes) or type(a) in hdd_arraytypes]
    if hdd_arrays:
        cloned_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray]
        other_arrays = [a for a in arrays if not ((hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray)]

        cloned_arrays_ids = [id(a) for a in cloned_arrays]
        other_arrays_ids = [id(a) for a in other_arrays]

        for fragments in fragment(*other_arrays):
            it_other_arrays = iter(other_arrays)
            it_cloned_arrays = iter(cloned_arrays)

            array_names = []
            for a in arrays:
                if id(a) in cloned_arrays_ids:
                    array_names.append(repr(it_cloned_arrays.next()))
                    continue
                if id(a) in other_arrays_ids:
                    array_names.append(repr(it_other_arrays.next()))
                    continue

            view.apply(interactive(map_wrapper), interactive(f), array_names, **kwargs)
    else:
        array_names = [repr(a) for a in arrays]
        view.apply(interactive(map_wrapper), interactive(f), array_names, **kwargs)

    view.targets = tmp_targets # restore target list
Example #12
0
def mapReduce(map_func, reduce_op, *arrays, **kwargs):
    """Applies *map_func* on :term:`engine` on local arrays related to *arrays*
    and reduces its result in a tree-like fashion over all axes.
    Example: ::

        cluster_array = pyDive.ones(shape=[100], distaxes=0)

        s = pyDive.mapReduce(lambda a: a**2, np.add, cluster_array) # a is the local numpy-array of *cluster_array*
        assert s == 100

    :param callable f: function to be called on :term:`engine`. Has to accept *numpy-arrays* and *kwargs*
    :param numpy-ufunc reduce_op: reduce operation, e.g. *numpy.add*.
    :param arrays: list of arrays including *pyDive.ndarrays*, *pyDive.h5_ndarrays* or *pyDive.cloned_ndarrays*
    :param kwargs: user-specified keyword arguments passed to *f*
    :raises AssertionError: if the *shapes* of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match
    :raises AssertionError: if the *distaxes* attributes of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match

    Notes:
        - If the hdf5 data exceeds the memory limit (currently 25% of the combined main memory of all cluster nodes)\
            the data will be read block-wise so that a block fits into memory.
        - *mapReduce* chooses the list of *engines* from the **first** element of *arrays*. On these engines the mapReduce will be executed.\
            If the first array is a *pyDive.h5_ndarray* all engines will be used.
        - *mapReduce* is not writing data back to a *pyDive.h5_ndarray* yet.
        - *mapReduce* does not equalize the element distribution of *pyDive.ndarrays* before execution.
    """
    def mapReduce_wrapper(map_func, reduce_op_name, array_names, **kwargs):
        arrays = [globals()[array_name] for array_name in array_names]
        reduce_op =  eval("np." + reduce_op_name)
        return algorithm.__tree_reduce(map_func(*arrays, **kwargs), axis=None, op=reduce_op)

    view = com.getView()
    tmp_targets = view.targets # save current target list
    if type(arrays[0]) == VirtualArrayOfStructs:
        view.targets = arrays[0].firstArray.target_ranks
    else:
        view.targets = arrays[0].target_ranks

    result = None

    hdd_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype in hdd_arraytypes) or type(a) in hdd_arraytypes]
    if hdd_arrays:
        cloned_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray]
        other_arrays = [a for a in arrays if not ((hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray)]

        cloned_arrays_ids = [id(a) for a in cloned_arrays]
        other_arrays_ids = [id(a) for a in other_arrays]

        for fragments in fragment(*other_arrays):
            it_other_arrays = iter(other_arrays)
            it_cloned_arrays = iter(cloned_arrays)

            array_names = []
            for a in arrays:
                if id(a) in cloned_arrays_ids:
                    array_names.append(repr(it_cloned_arrays.next()))
                    continue
                if id(a) in other_arrays_ids:
                    array_names.append(repr(it_other_arrays.next()))
                    continue

            targets_results = view.apply(interactive(mapReduce_wrapper),\
                interactive(map_func), reduce_op.__name__, array_names, **kwargs)

            fragment_result = reduce_op.reduce(targets_results) # reduce over targets' results
            if result is None:
                result = fragment_result
            else:
                result = reduce_op(result, fragment_result)
    else:
        array_names = [repr(a) for a in arrays]
        targets_results = view.apply(interactive(mapReduce_wrapper),\
            interactive(map_func), reduce_op.__name__, array_names, **kwargs)

        result = reduce_op.reduce(targets_results) # reduce over targets' results

    view.targets = tmp_targets # restore target list

    return result
Example #13
0
def fragment_comparison_split(flist, threshold=3):

    flist = sorted(flist, key=lambda x: x.seq[0][0])

    contam_bounds = defaultdict(set)

    N = len(flist)

    #for k in sorted(list(cov_counts.keys())):
    #    print("{}\t{}".format(k,cov_counts[k]))

    for i in range(N):
        for j in range(i + 1, N):

            f1 = flist[i]
            f2 = flist[j]

            if not overlap(f1, f2):
                continue

            cons, contam_bounds = consistent(f1, f2, i, j, contam_bounds,
                                             threshold)

    break_list = [set() for x in range(N)]

    for i in range(N):

        f1 = flist[i]

        # consider repairing f1.
        # if f1 is inconsistent with multiple reads then we just split f1 where inconsistent.
        # if f1 is inconsistent with one read, then we split both reads at the inconsistent location.

        blist = []
        for j in range(N):
            if (i, j) not in contam_bounds or contam_bounds[(i, j)] == set():
                continue
            blist.append(j)

        for b1, b2 in itertools.combinations(blist, 2):

            common_error = set.intersection(contam_bounds[(i, b1)],
                                            contam_bounds[(i, b2)])

            if common_error != set():
                break_list[i] = break_list[i].union(common_error)
                for bx in blist:
                    inter = set.intersection(common_error,
                                             contam_bounds[(i, bx)])
                    if inter != set():
                        rightward = next(iter(inter))
                        leftward = rightward - 1
                        while rightward in contam_bounds[(i, bx)]:
                            contam_bounds[(i, bx)].remove(rightward)
                            rightward += 1
                        while leftward in contam_bounds[(i, bx)]:
                            contam_bounds[(i, bx)].remove(leftward)
                            leftward -= 1

    new_flist = []

    for i in range(N):

        f1 = flist[i]

        # consider repairing f1.
        for j in range(N):

            if (i, j) in contam_bounds and contam_bounds[
                (i, j)] != set():  # inconsistencies weren't previously fixed
                break_list[i] = set.union(break_list[i], contam_bounds[(i, j)])

        if break_list[i] == set():
            # add unedited fragment to new fragment list
            new_flist.append(f1)
        else:
            # chop up fragment where necessary and add pieces to fragment list
            new_flist_temp = []
            new_seq = []
            name_ctr = 1
            for allele in f1.seq:

                if allele[0] in break_list[i]:
                    if VERBOSE:
                        print("Breaking {} at pos {}".format(
                            f1.name, allele[0]))
                    if len(new_seq) > 1:
                        new_name = "{}:S{}".format(f1.name, name_ctr)
                        new_flist_temp.append(
                            fragment.fragment(new_seq, new_name, False, False))
                        name_ctr += 1

                    new_seq = []

                new_seq.append(allele)

            if len(new_seq) > 1:
                new_name = "{}:S{}".format(f1.name, name_ctr)
                new_flist_temp.append(
                    fragment.fragment(new_seq, new_name, False, False))

            if f1.first_piece:
                new_flist_temp[0].first_piece = True
            if f1.last_piece:
                new_flist_temp[-1].last_piece = True

            new_flist += new_flist_temp

    return new_flist