def filter_fragment_coverage(flist, coverage): cov_counts = defaultdict(int) for f in flist: for a in f.seq: cov_counts[a[0]] += 1 filtered_flist = [] # for each fragment and name for f in flist: new_seq = [] for a in f.seq: # if coverage greater than 1 if cov_counts[a[0]] >= coverage: new_seq.append(a) # if the fragment has at least 2 alleles after filtering add it to new list if len(new_seq) >= 2: filtered_flist.append( fragment.fragment(new_seq, f.name, f.first_piece, f.last_piece)) return filtered_flist
def read_package(self, package): """reads the dom element package and sets internal state according to it""" # the standard std = self.paper and self.paper.standard or None self.name = package.getAttribute('name') if package.getAttribute('id'): self.id = package.getAttribute('id') for name, cls in { 'atom': atom, 'group': group, 'text': textatom, 'query': queryatom }.iteritems(): for a in dom_extensions.simpleXPathSearch(package, name): self.insert_atom(cls(standard=std, package=a, molecule=self)) self._id_map = [a.id for a in self.atoms] for b in dom_extensions.simpleXPathSearch(package, 'bond'): bnd = bond(standard=std, package=b, molecule=self) self.add_edge(bnd.atom1, bnd.atom2, bnd) # template related attributes temp = package.getElementsByTagName('template') if temp: temp = temp[0] self.t_atom = Store.id_manager.get_object_with_id( temp.getAttribute('atom')) if temp.getAttribute('bond_first') and temp.getAttribute( 'bond_second'): self.t_bond_first = Store.id_manager.get_object_with_id( temp.getAttribute('bond_first')) self.t_bond_second = Store.id_manager.get_object_with_id( temp.getAttribute('bond_second')) self.next_to_t_atom = self.t_atom.neighbors[0] # display form df = package.getElementsByTagName('display-form') if df: df = df[0] self.display_form = ''.join([e.toxml() for e in df.childNodes ]).encode('utf-8') # fragments for fel in dom_extensions.simpleXPathSearch(package, "fragment"): f = fragment() try: f.read_package(fel) except bkchem_exceptions.bkchem_fragment_error: pass else: self.fragments.add(f) ud = dom_extensions.getChildrenNamed(package, "user-data") if ud: self.user_data = [u.cloneNode(True) for u in ud] # final check of atoms valecies [ a.raise_valency_to_senseful_value() for a in self.vertices if isinstance(a, atom) ]
def create_fragment( self, name, edges, vertices, type="explicit", strict=False): if (strict and self.defines_connected_subgraph_e( edges)) or not strict: nf = fragment( Store.id_manager.generate_id( "frag"), name=name, type=type) nf.edges = set( edges) nf.vertices = set( vertices) self.fragments.add( nf) return nf else: return None
def split_fragment_hets(frag, min_het_count=3, max_het_frac=0.25): breakpos = get_het_breakpoints(frag, min_het_count, max_het_frac) new_fragment_pieces = [] new_piece = [] split_occured = False for pos, gpos, call, qual in frag.seq: if pos in breakpos: split_occured = True if len(new_piece) >= 2: new_fragment_pieces.append(new_piece) new_piece = [] else: if call != 'M': new_piece.append((pos, gpos, call, qual)) if len(new_piece) >= 2: new_fragment_pieces.append(new_piece) new_piece = [] if new_fragment_pieces == []: return [] new_fragment_piece_objects = [] if split_occured: for i, piece in enumerate(new_fragment_pieces): new_name = '{}:H{}'.format(frag.name, i + 1) first_piece = False last_piece = False if i == 0 and frag.first_piece: first_piece = True if i == len(new_fragment_pieces) - 1 and frag.last_piece: last_piece = True new_fragment_piece_objects.append( fragment.fragment(piece, new_name, first_piece, last_piece)) else: new_fragment_piece_objects.append( fragment.fragment(new_fragment_pieces[0], frag.name, frag.first_piece, frag.last_piece)) return new_fragment_piece_objects
def create_fragment(self, name, edges, vertices, type="explicit", strict=False): if (strict and self.defines_connected_subgraph_e(edges)) or not strict: nf = fragment(Store.id_manager.generate_id("frag"), name=name, type=type) nf.edges = set(edges) nf.vertices = set(vertices) self.fragments.add(nf) return nf else: return None
def read_package( self, package): """reads the dom element package and sets internal state according to it""" # the standard std = self.paper and self.paper.standard or None self.name = package.getAttribute( 'name') if package.getAttribute( 'id'): self.id = package.getAttribute( 'id') for name, cls in {'atom': atom, 'group': group, 'text': textatom, 'query': queryatom}.items(): for a in dom_extensions.simpleXPathSearch( package, name): self.insert_atom( cls( standard=std, package=a, molecule=self)) self._id_map = [a.id for a in self.atoms] for b in dom_extensions.simpleXPathSearch( package, 'bond'): bnd = bond( standard=std, package=b, molecule=self) self.add_edge( bnd.atom1, bnd.atom2, bnd) # template related attributes temp = package.getElementsByTagName('template') if temp: temp = temp[0] self.t_atom = Store.id_manager.get_object_with_id( temp.getAttribute( 'atom')) if temp.getAttribute('bond_first') and temp.getAttribute('bond_second'): self.t_bond_first = Store.id_manager.get_object_with_id( temp.getAttribute( 'bond_first')) self.t_bond_second = Store.id_manager.get_object_with_id( temp.getAttribute( 'bond_second')) self.next_to_t_atom = self.t_atom.neighbors[0] # display form df = package.getElementsByTagName('display-form') if df: df = df[0] self.display_form = ''.join( [e.toxml('utf-8') for e in df.childNodes]) # fragments for fel in dom_extensions.simpleXPathSearch( package, "fragment"): f = fragment() try: f.read_package( fel) except bkchem_exceptions.bkchem_fragment_error: pass else: self.fragments.add( f) ud = dom_extensions.getChildrenNamed( package, "user-data") if ud: self.user_data = [u.cloneNode( True) for u in ud] # final check of atoms valecies [a.raise_valency_to_senseful_value() for a in self.vertices if isinstance( a, atom)]
def filter_on_VCF(flist, vcf_filter): vcf_set = set() file_chrom = None # whole VCF file should have this chromosome. with open(vcf_filter, 'r') as infile: for line in infile: if line[:1] == '#': continue el = line.strip().split('\t') if len(el) < 5: continue vcf_chrom = el[0] if file_chrom == None: file_chrom = vcf_chrom elif vcf_chrom != file_chrom: print("ERROR, multi-chromosomal VCF.") exit(1) genomic_pos = int(el[1]) - 1 vcf_set.add(genomic_pos) filtered_flist = [] # for each fragment and name for f in flist: new_seq = [] for a in f.seq: # if coverage greater than 1 if a[1] in vcf_set: new_seq.append(a) # if the fragment has at least 2 alleles after filtering add it to new list if len(new_seq) >= 2: filtered_flist.append( fragment.fragment(new_seq, f.name, f.first_piece, f.last_piece)) return filtered_flist
def reduce(array, op): """Perform a tree-like reduction over all axes of *array*. :param array: *pyDive.ndarray*, *pyDive.h5_ndarray* or *pyDive.cloned_ndarray* to be reduced :param numpy-ufunc op: reduce operation, e.g. *numpy.add*. If the hdf5 data exceeds the memory limit (currently 25% of the combined main memory of all cluster nodes)\ the data will be read block-wise so that a block fits into memory. """ def reduce_wrapper(array_name, op_name): array = globals()[array_name] op = eval("np." + op_name) return algorithm.__tree_reduce(array, axis=None, op=op) # reduction over all axes view = com.getView() tmp_targets = view.targets # save current target list if type(array) == VirtualArrayOfStructs: view.targets = array.firstArray.target_ranks else: view.targets = array.target_ranks result = None if (hasattr(array, "arraytype") and array.arraytype in hdd_arraytypes) or type(array) in hdd_arraytypes: for chunk in fragment(array): array_name = repr(chunk) targets_results = view.apply(interactive(reduce_wrapper), array_name, op.__name__) chunk_result = op.reduce(targets_results) # reduce over targets' results if result is None: result = chunk_result else: result = op(result, chunk_result) else: array_name = repr(array) targets_results = view.apply(interactive(reduce_wrapper), array_name, op.__name__) result = op.reduce(targets_results) # reduce over targets' results view.targets = tmp_targets # restore target list return result
def filter_het_positions(flist): filtered_flist = [] # for each fragment and name for f in flist: new_seq = [] for a in f.seq: # if coverage greater than 1 if a[2] != 'M': new_seq.append(a) # if the fragment has at least 2 alleles after filtering add it to new list if len(new_seq) >= 2: filtered_flist.append( fragment.fragment(new_seq, f.name, f.first_piece, f.last_piece)) return filtered_flist
s = socket.socket(socket.AF_LORA, socket.SOCK_RAW) s.setsockopt(socket.SOL_LORA, socket.SO_DR, 5) s.setsockopt(socket.SOL_LORA, socket.SO_CONFIRMED, False) pycom.heartbeat(False) # create a message for the trigger. message = "Hello LoRa" fmt = ">%ds" % len(message) buf = struct.pack(fmt, message) l2_size = len(message) # it must be set in each sending message. s.send(buf) # fragment instance # XXX rule_id and dtag are zero for the ietf100 testing. fg = fragment.fragment(buf, 0, 0, window_size=1) dfg = fragment.defragment_factory() while True: pycom.rgbled(0xFF0000) s.setblocking(True) s.settimeout(10) # waiting somethign from the server try: rx_data = s.recv(64) print("received:", rx_data) # trying to defrag ret, buf = dfg.defrag(rx_data) if ret == _SCHC_DEFRAG_NOTYET:
def map(f, *arrays, **kwargs): """Applies *f* on :term:`engine` on local arrays related to *arrays*. Example: :: cluster_array = pyDive.ones(shape=[100], distaxes=0) cluster_array *= 2.0 # equivalent to pyDive.map(lambda a: a *= 2.0, cluster_array) # a is the local numpy-array of *cluster_array* Or, as a decorator: :: @pyDive.map def twice(a): a *= 2.0 twice(cluster_array) :param callable f: function to be called on :term:`engine`. Has to accept *numpy-arrays* and *kwargs* :param arrays: list of arrays including *pyDive.ndarrays*, *pyDive.h5_ndarrays* or *pyDive.cloned_ndarrays* :param kwargs: user-specified keyword arguments passed to *f* :raises AssertionError: if the *shapes* of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match :raises AssertionError: if the *distaxes* attributes of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match Notes: - If the hdf5 data exceeds the memory limit (currently 25% of the combined main memory of all cluster nodes)\ the data will be read block-wise so that a block fits into memory. - *map* chooses the list of *engines* from the **first** element of *arrays*. On these engines *f* is called.\ If the first array is a *pyDive.h5_ndarray* all engines will be used. - *map* is not writing data back to a *pyDive.h5_ndarray* yet. - *map* does not equalize the element distribution of *pyDive.ndarrays* before execution. """ if not arrays: # decorator mode def map_deco(*arrays, **kwargs): map(f, *arrays, **kwargs) return map_deco def map_wrapper(f, array_names, **kwargs): arrays = [globals()[array_name] for array_name in array_names] f(*arrays, **kwargs) view = com.getView() tmp_targets = view.targets # save current target list if type(arrays[0]) == VirtualArrayOfStructs: view.targets = arrays[0].firstArray.target_ranks else: view.targets = arrays[0].target_ranks hdd_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype in hdd_arraytypes) or type(a) in hdd_arraytypes] if hdd_arrays: cloned_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray] other_arrays = [a for a in arrays if not ((hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray)] cloned_arrays_ids = [id(a) for a in cloned_arrays] other_arrays_ids = [id(a) for a in other_arrays] for fragments in fragment(*other_arrays): it_other_arrays = iter(other_arrays) it_cloned_arrays = iter(cloned_arrays) array_names = [] for a in arrays: if id(a) in cloned_arrays_ids: array_names.append(repr(it_cloned_arrays.next())) continue if id(a) in other_arrays_ids: array_names.append(repr(it_other_arrays.next())) continue view.apply(interactive(map_wrapper), interactive(f), array_names, **kwargs) else: array_names = [repr(a) for a in arrays] view.apply(interactive(map_wrapper), interactive(f), array_names, **kwargs) view.targets = tmp_targets # restore target list
def mapReduce(map_func, reduce_op, *arrays, **kwargs): """Applies *map_func* on :term:`engine` on local arrays related to *arrays* and reduces its result in a tree-like fashion over all axes. Example: :: cluster_array = pyDive.ones(shape=[100], distaxes=0) s = pyDive.mapReduce(lambda a: a**2, np.add, cluster_array) # a is the local numpy-array of *cluster_array* assert s == 100 :param callable f: function to be called on :term:`engine`. Has to accept *numpy-arrays* and *kwargs* :param numpy-ufunc reduce_op: reduce operation, e.g. *numpy.add*. :param arrays: list of arrays including *pyDive.ndarrays*, *pyDive.h5_ndarrays* or *pyDive.cloned_ndarrays* :param kwargs: user-specified keyword arguments passed to *f* :raises AssertionError: if the *shapes* of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match :raises AssertionError: if the *distaxes* attributes of *pyDive.ndarrays* and *pyDive.h5_ndarrays* do not match Notes: - If the hdf5 data exceeds the memory limit (currently 25% of the combined main memory of all cluster nodes)\ the data will be read block-wise so that a block fits into memory. - *mapReduce* chooses the list of *engines* from the **first** element of *arrays*. On these engines the mapReduce will be executed.\ If the first array is a *pyDive.h5_ndarray* all engines will be used. - *mapReduce* is not writing data back to a *pyDive.h5_ndarray* yet. - *mapReduce* does not equalize the element distribution of *pyDive.ndarrays* before execution. """ def mapReduce_wrapper(map_func, reduce_op_name, array_names, **kwargs): arrays = [globals()[array_name] for array_name in array_names] reduce_op = eval("np." + reduce_op_name) return algorithm.__tree_reduce(map_func(*arrays, **kwargs), axis=None, op=reduce_op) view = com.getView() tmp_targets = view.targets # save current target list if type(arrays[0]) == VirtualArrayOfStructs: view.targets = arrays[0].firstArray.target_ranks else: view.targets = arrays[0].target_ranks result = None hdd_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype in hdd_arraytypes) or type(a) in hdd_arraytypes] if hdd_arrays: cloned_arrays = [a for a in arrays if (hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray] other_arrays = [a for a in arrays if not ((hasattr(a, "arraytype") and a.arraytype is cloned_ndarray) or type(a) is cloned_ndarray)] cloned_arrays_ids = [id(a) for a in cloned_arrays] other_arrays_ids = [id(a) for a in other_arrays] for fragments in fragment(*other_arrays): it_other_arrays = iter(other_arrays) it_cloned_arrays = iter(cloned_arrays) array_names = [] for a in arrays: if id(a) in cloned_arrays_ids: array_names.append(repr(it_cloned_arrays.next())) continue if id(a) in other_arrays_ids: array_names.append(repr(it_other_arrays.next())) continue targets_results = view.apply(interactive(mapReduce_wrapper),\ interactive(map_func), reduce_op.__name__, array_names, **kwargs) fragment_result = reduce_op.reduce(targets_results) # reduce over targets' results if result is None: result = fragment_result else: result = reduce_op(result, fragment_result) else: array_names = [repr(a) for a in arrays] targets_results = view.apply(interactive(mapReduce_wrapper),\ interactive(map_func), reduce_op.__name__, array_names, **kwargs) result = reduce_op.reduce(targets_results) # reduce over targets' results view.targets = tmp_targets # restore target list return result
def fragment_comparison_split(flist, threshold=3): flist = sorted(flist, key=lambda x: x.seq[0][0]) contam_bounds = defaultdict(set) N = len(flist) #for k in sorted(list(cov_counts.keys())): # print("{}\t{}".format(k,cov_counts[k])) for i in range(N): for j in range(i + 1, N): f1 = flist[i] f2 = flist[j] if not overlap(f1, f2): continue cons, contam_bounds = consistent(f1, f2, i, j, contam_bounds, threshold) break_list = [set() for x in range(N)] for i in range(N): f1 = flist[i] # consider repairing f1. # if f1 is inconsistent with multiple reads then we just split f1 where inconsistent. # if f1 is inconsistent with one read, then we split both reads at the inconsistent location. blist = [] for j in range(N): if (i, j) not in contam_bounds or contam_bounds[(i, j)] == set(): continue blist.append(j) for b1, b2 in itertools.combinations(blist, 2): common_error = set.intersection(contam_bounds[(i, b1)], contam_bounds[(i, b2)]) if common_error != set(): break_list[i] = break_list[i].union(common_error) for bx in blist: inter = set.intersection(common_error, contam_bounds[(i, bx)]) if inter != set(): rightward = next(iter(inter)) leftward = rightward - 1 while rightward in contam_bounds[(i, bx)]: contam_bounds[(i, bx)].remove(rightward) rightward += 1 while leftward in contam_bounds[(i, bx)]: contam_bounds[(i, bx)].remove(leftward) leftward -= 1 new_flist = [] for i in range(N): f1 = flist[i] # consider repairing f1. for j in range(N): if (i, j) in contam_bounds and contam_bounds[ (i, j)] != set(): # inconsistencies weren't previously fixed break_list[i] = set.union(break_list[i], contam_bounds[(i, j)]) if break_list[i] == set(): # add unedited fragment to new fragment list new_flist.append(f1) else: # chop up fragment where necessary and add pieces to fragment list new_flist_temp = [] new_seq = [] name_ctr = 1 for allele in f1.seq: if allele[0] in break_list[i]: if VERBOSE: print("Breaking {} at pos {}".format( f1.name, allele[0])) if len(new_seq) > 1: new_name = "{}:S{}".format(f1.name, name_ctr) new_flist_temp.append( fragment.fragment(new_seq, new_name, False, False)) name_ctr += 1 new_seq = [] new_seq.append(allele) if len(new_seq) > 1: new_name = "{}:S{}".format(f1.name, name_ctr) new_flist_temp.append( fragment.fragment(new_seq, new_name, False, False)) if f1.first_piece: new_flist_temp[0].first_piece = True if f1.last_piece: new_flist_temp[-1].last_piece = True new_flist += new_flist_temp return new_flist