def get_results(url): # get response from url tables = [] while len(tables) < index_min: # print(len(tables), end=' ') driver = webdriver.Chrome() driver.get(url) time.sleep(3) # driver.implicitly_wait(3) soup = BeautifulSoup(driver.page_source, 'lxml') tables = soup.find_all('table') # print(len(tables)) # get race info per race info_panel = tables[1] info = list(map(lambda x: x.get_text(), info_panel.find_all('td'))) race_info = { 'tag': info[6], 'name': info[9], 'cond': info[7] + ' ' + info[8], 'track': info[10] + ' ' + info[11] } # ------------------------- # input and process results # ------------------------- index = index_min table_results = make_list(tables[index_min]) # try out the index of the awards table while len(table_results) == 0 or len( table_results[0]) == 0 or table_results[0][0] != "名次": index = index + 1 table_results = make_list(tables[index]) # filter valid rows table_results = list(filter(lambda x: len(x) > 10, table_results)) for i, row in enumerate(table_results): if i == 0: continue # join the section positions into 1 slot table_results[i] = row # ----------------------------- # input and process award rates # ----------------------------- index = index_min table_awards = make_list(tables[index_min]) # try out the index of the awards table while len(table_awards) == 0 or len( table_awards[0]) == 0 or table_awards[0][0] != "派彩": index = index + 1 table_awards = make_list(tables[index]) # process the awards table table_awards = table_awards[1:] for i, row in reversed(list(enumerate(table_awards))): if i == 0: continue if util.is_even(len(row)): table_awards[i - 1] += row table_awards = list( map(lambda x: [x[0], list(zip(x[1::2], x[2::2]))], list(filter(lambda x: not util.is_even(len(x)), table_awards)))) # print_table(table_awards) return race_info, table_results, table_awards
def __init__(self, tree): self.name = tree.name self.type_parameters = make_list(self.build(tree.type_parameters)) self.modifiers = tree.modifiers self.members = [] self.pure_virtual = False if isinstance(tree, InterfaceDeclaration): self.pure_virtual = True # load members for member in tree.body: member_handle = None if isinstance(member, FieldDeclaration): if self.pure_virtual: self.error("FieldDeclaration in interface!") continue declarations = __init__.ASTVariable.ASTVariable.split_unique(member) for declaration in declarations: field = __init__.ASTVariable.ASTVariable(declaration) self.members.append(field) elif isinstance(member, MethodDeclaration) or isinstance(member, ConstructorDeclaration): method = __init__.ASTMethod.ASTMethod(member) if self.pure_virtual: method.pure_virtual = True self.members.append(method) elif isinstance(member, ClassDeclaration): class_handle = __init__.ASTClass.ASTClass(member) self.members.append(class_handle) elif isinstance(member, EmptyDeclaration): pass elif isinstance(member, InterfaceDeclaration): class_handle = ASTClass(member) self.members.append(class_handle) else: self.warn("Unexpected class member:", member) return # Check if a constructor exists if not self.pure_virtual: for member in tree.body: if isinstance(member, ConstructorDeclaration): break else: self.warn("No constructor found in the class '" + self.name + "'! Please declare one!") self.parent_classes = [] if hasattr(tree, "implements") and tree.implements: self.parent_classes += [self.build(i) for i in make_list(tree.implements)] if hasattr(tree, "extends") and tree.extends: self.parent_classes += [self.build(i) for i in make_list(tree.extends)]
def search(self,terms): data = dict(); terms = make_list(terms); if self.gene_sym1 and self.gene_sym2: key = "Interaction %s <--> %s" % (self.gene_sym1,self.gene_sym2); else: key = "Interaction %s <--> %s" % (self.gene1,self.gene2); for term in terms: re_string = "\\b" + str(term) + "\\b"; pattern = re.compile(re_string,re.I); for proc in self.processes: if pattern.search(proc[0]): data.setdefault(key + ' Processes',[]).append(term); break; for comp in self.components: if pattern.search(comp[0]): data.setdefault(key + ' Components',[]).append(term); break; for func in self.functions: if pattern.search(func[0]): data.setdefault(key + ' Functions',[]).append(term); break; return data;
def compute_adjoint_modes(self, mode_indices, modes, adjoint_vec_handles=None): """Computes adjoint modes, calls ``put`` on them. Args: ``mode_indices``: List of mode numbers to compute. Examples are ``range(10)`` or ``[3, 0, 6, 8]``. ``modes``: List of handles for adjoint modes. Kwargs: ``adjoint_vec_handles``: List of handles for adjoint vecs (:math:`Y`). Optional if already given when calling :py:meth:`compute_decomp`. """ if adjoint_vec_handles is not None: self.adjoint_vec_handles = util.make_list(adjoint_vec_handles) if self.adjoint_vec_handles is None: raise util.UndefinedError('adjoint_vec_handles undefined') self.sing_vals = N.squeeze(N.array(self.sing_vals)) build_coeff_mat = N.dot(self.L_sing_vecs, N.diag(self.sing_vals**-0.5)) self.vec_space.lin_combine(modes, self.adjoint_vec_handles, build_coeff_mat, coeff_mat_col_indices=mode_indices)
def search(self, terms): data = dict() terms = make_list(terms) if self.gene_sym1 and self.gene_sym2: key = "Interaction %s <--> %s" % (self.gene_sym1, self.gene_sym2) else: key = "Interaction %s <--> %s" % (self.gene1, self.gene2) for term in terms: re_string = "\\b" + str(term) + "\\b" pattern = re.compile(re_string, re.I) for proc in self.processes: if pattern.search(proc[0]): data.setdefault(key + ' Processes', []).append(term) break for comp in self.components: if pattern.search(comp[0]): data.setdefault(key + ' Components', []).append(term) break for func in self.functions: if pattern.search(func[0]): data.setdefault(key + ' Functions', []).append(term) break return data
def compute_direct_modes(self, mode_indices, modes, direct_vec_handles=None): """Computes direct modes and calls ``put`` on them. Args: ``mode_indices``: List of mode indices, ``range(10)`` or ``[3, 0, 6]``. ``modes``: List of handles for direct modes. Kwargs: ``direct_vec_handles``: List of handles for direct vecs (:math:`X`). Optional if already given when calling :py:meth:`compute_decomp`. """ if direct_vec_handles is not None: self.direct_vec_handles = util.make_list(direct_vec_handles) if self.direct_vec_handles is None: raise util.UndefinedError('direct_vec_handles undefined') self.sing_vals = N.squeeze(N.array(self.sing_vals)) build_coeff_mat = N.dot(self.R_sing_vecs, N.diag(self.sing_vals**-0.5)) self.vec_space.lin_combine(modes, self.direct_vec_handles, build_coeff_mat, coeff_mat_col_indices=mode_indices)
def __init__(self, tree): self.name = tree.name self.type_parameters = make_list(__init__.ASTBuilder.ASTBuilder.build(tree.type_parameters)) self.modifiers = tree.modifiers self.members = [] # load members for member in tree.body: member_handle = None if isinstance(member, FieldDeclaration): declarations = __init__.ASTVariable.ASTVariable.split_unique(member) for declaration in declarations: field = __init__.ASTVariable.ASTVariable(declaration) self.members.append(field) elif isinstance(member, MethodDeclaration) or isinstance(member, ConstructorDeclaration): method = __init__.ASTMethod.ASTMethod(member) self.members.append(method) else: print("Unexpected class member:", member) return
def add(self,eqtl): eqtl = make_list(eqtl); for q in eqtl: if isinstance(q,eQTL): self.qtls.add(q); self.snp_ptrs.setdefault(q.snp,set()).add(q); self.gene_ptrs.setdefault(q.gene,set()).add(q);
def add(self, eqtl): eqtl = make_list(eqtl) for q in eqtl: if isinstance(q, eQTL): self.qtls.add(q) self.snp_ptrs.setdefault(q.snp, set()).add(q) self.gene_ptrs.setdefault(q.gene, set()).add(q)
def add_gene_interactions(self,interactions): interactions = make_list(interactions); for inter in interactions: if isinstance(inter,GeneInteraction): (gene1,gene2) = inter.get_gene_pair(); self.root[InteractionDB._GENE_ROOT].setdefault(gene1,{}).setdefault(gene2,inter); self.root[InteractionDB._GENE_ROOT].setdefault(gene2,{}).setdefault(gene1,inter); self.gene_interactions.append(inter);
def project(fdarray, line='bead', column=None, orth=False): """Project onto a subspace defined by line. Line is either a string, specifying a predefined line or a dictionary with keys that are isotope labels and values that are ratios, e.g., if line = 'bead', sets line = {'Ce140': 88.45, 'Ce142':11.11, 'Eu151':47.8, 'Eu153':52.1, 'Ho165':100., 'Lu175':97.4, 'Lu176': 2.6} The projection uses only those markers that are avalible in the datasets. If column is not None, then a new column is appended to each flow dataset. If there is only one element in fdarray, then this returns the projection of each event. The predefined line values are: bead - 4 Isotope bead labels cell - Ir191/Ir193 stain """ if line == 'bead': line = { 'Ce140': 88.45, 'Ce142': 11.11, 'Eu151': 47.8, 'Eu153': 52.1, 'Ho165': 100., 'Lu175': 97.4, 'Lu176': 2.6 } if line == 'cell': line = {'Ir191': 37.3, 'Ir193': 62.7} fdarray = util.make_list(fdarray) for fd in fdarray: a = np.zeros(len(fd.isotopes)) for j, isotope in enumerate(fd.isotopes): if isotope in line: a[j] = line[isotope] X = np.vstack([fd[ch] for ch in fd.isotopes]) # Compute the orthogonal projector if asked. if not orth: Px = np.dot(a, X) / np.sqrt(np.dot(a, a)) else: Px = np.sqrt( np.sum((X - np.outer(a, np.dot(a, X)) / np.dot(a, a))**2, axis=0)) if column is not None: fd[column] = Px if len(fdarray) == 1: return Px
def add_gene_interactions(self, interactions): interactions = make_list(interactions) for inter in interactions: if isinstance(inter, GeneInteraction): (gene1, gene2) = inter.get_gene_pair() self.root[InteractionDB._GENE_ROOT].setdefault( gene1, {}).setdefault(gene2, inter) self.root[InteractionDB._GENE_ROOT].setdefault( gene2, {}).setdefault(gene1, inter) self.gene_interactions.append(inter)
def get_age(url, horse_id): # get response from url tables = [] while len(tables) < INDEX_MIN: # print(len(tables), end=' ') driver = webdriver.Chrome() driver.get(url + horse_id) time.sleep(3) # driver.implicitly_wait(3) soup = BeautifulSoup(driver.page_source, 'lxml') tables = soup.find_all('table') driver.quit() if (len(tables) <= 4): return "-" if make_list(tables[4])[0][0] != "出生地 / 馬齡": return "-" age = make_list(tables[4])[0][2].split("/")[1].lstrip(' ') # print(horse_id, age) return age
def project(fdarray, line = 'bead', column = None, orth = False): """Project onto a subspace defined by line. Line is either a string, specifying a predefined line or a dictionary with keys that are isotope labels and values that are ratios, e.g., if line = 'bead', sets line = {'Ce140': 88.45, 'Ce142':11.11, 'Eu151':47.8, 'Eu153':52.1, 'Ho165':100., 'Lu175':97.4, 'Lu176': 2.6} The projection uses only those markers that are avalible in the datasets. If column is not None, then a new column is appended to each flow dataset. If there is only one element in fdarray, then this returns the projection of each event. The predefined line values are: bead - 4 Isotope bead labels cell - Ir191/Ir193 stain """ if line == 'bead': line = {'Ce140': 88.45, 'Ce142':11.11, 'Eu151':47.8, 'Eu153':52.1, 'Ho165':100., 'Lu175':97.4, 'Lu176': 2.6} if line == 'cell': line = {'Ir191': 37.3, 'Ir193':62.7} fdarray = util.make_list(fdarray) for fd in fdarray: a = np.zeros(len(fd.isotopes)) for j, isotope in enumerate(fd.isotopes): if isotope in line: a[j]=line[isotope] X = np.vstack([fd[ch] for ch in fd.isotopes]) # Compute the orthogonal projector if asked. if not orth: Px = np.dot(a,X)/np.sqrt(np.dot(a,a)) else: Px = np.sqrt(np.sum( (X - np.outer(a,np.dot(a,X))/np.dot(a,a) )**2, axis = 0)) if column is not None: fd[column] = Px if len(fdarray) == 1: return Px
def get_racecard(url): # get response from url tables = [] while len(tables) < index_min: # print(len(tables), end=' ') driver = webdriver.Chrome() driver.get(url) time.sleep(3) # driver.implicitly_wait(3) soup = BeautifulSoup(driver.page_source, 'lxml') tables = soup.find_all('table') # print(len(tables)) # input and process racecard table_racecard = make_list(tables[8]) return table_racecard
def compute_modes(self, mode_indices, modes, vec_handles=None): """Computes the modes and calls ``put`` on the mode handles. Args: ``mode_indices``: List of mode numbers, e.g. ``range(10)`` or ``[3, 0, 5]``. ``modes``: List of handles for modes. Kwargs: ``vec_handles``: List of handles for vectors. Optional if given when calling ``compute_decomp``. """ if vec_handles is not None: self.vec_handles = util.make_list(vec_handles) build_coeff_mat = self._compute_build_coeff_mat() self.vec_space.lin_combine(modes, self.vec_handles, build_coeff_mat, coeff_mat_col_indices=mode_indices)
def compute_modes(self, mode_indices, mode_handles, vec_handles=None): """Computes modes and calls ``put`` on them. Args: ``mode_indices``: List of mode indices, ``range(5)`` or ``[3, 0, 5]``. ``mode_handles``: List of handles for modes. Kwargs: ``vec_handles``: List of handles for vecs, can omit if given in :py:meth:`compute_decomp`. """ if self.build_coeffs is None: raise util.UndefinedError('self.build_coeffs is undefined.') if vec_handles is not None: self.vec_handles = util.make_list(vec_handles) # For sequential data, the user will provide a list vec_handles that # whose length is one larger than the number of rows of the # build_coeffs matrix. This is to be expected, as vec_handles is # essentially partitioned into two sets of handles, each of length one # less than vec_handles. if len(self.vec_handles) - self.build_coeffs.shape[0] == 1: self.vec_space.lin_combine(mode_handles, self.vec_handles[:-1], self.build_coeffs, coeff_mat_col_indices=mode_indices) # For a non-sequential dataset, the user will provide a list vec_handles # whose length is equal to the number of rows in the build_coeffs # matrix. elif len(self.vec_handles) == self.build_coeffs.shape[0]: self.vec_space.lin_combine(mode_handles, self.vec_handles, self.build_coeffs, coeff_mat_col_indices=mode_indices) # Otherwise, raise an error, as the number of handles should fit one of # the two cases described above. else: raise ValueError(('Number of vec_handles does not match number of ' 'columns in build_coeffs matrix.'))
def lin_combine(self, sum_vec_handles, basis_vec_handles, coeff_mat, coeff_mat_col_indices=None): """Linearly combines the basis vecs and calls ``put`` on result. Args: ``sum_vec_handles``: List of handles for the sum vectors. ``basis_vec_handles``: List of handles for the basis vecs. ``coeff_mat``: Matrix with rows corresponding to a basis vecs and columns to sum (lin. comb.) vecs. The rows and columns correspond, by index, to the lists basis_vec_handles and sum_vec_handles. ``sums = basis * coeff_mat`` Kwargs: ``coeff_mat_col_indices``: List of column indices. The sum_vecs corresponding to these col indices are computed. Each processor retrieves a subset of the basis vecs to compute as many outputs as a processor can have in memory at once. Each processor computes the "layers" from the basis it is resonsible for, and for as many modes as it can fit in memory. The layers from all procs are summed together to form the sum_vecs and ``put`` ed. Scaling is: num gets/worker = :math:`n_s/(n_p*(max-2)) * n_b/n_p` passes/worker = :math:`(n_p-1) * n_s/(n_p*(max-2)) * (n_b/n_p)` scalar multiplies/worker = :math:`n_s*n_b/n_p` Where :math:`n_s` is number of sum vecs, :math:`n_b` is number of basis vecs, :math:`n_p` is number of processors, :math:`max` = ``max_vecs_per_node``. """ sum_vec_handles = util.make_list(sum_vec_handles) basis_vec_handles = util.make_list(basis_vec_handles) num_bases = len(basis_vec_handles) num_sums = len(sum_vec_handles) if coeff_mat_col_indices is not None: coeff_mat = coeff_mat[:, coeff_mat_col_indices] if num_bases != coeff_mat.shape[0]: raise ValueError(('Number of coeff_mat rows (%d) does not equal ' 'number of basis handles (%d)'%(coeff_mat.shape[0],num_bases))) if num_sums != coeff_mat.shape[1]: raise ValueError(('Number of coeff_mat cols (%d) does not equal ' 'number of output handles (%d)')%(coeff_mat.shape[1],num_sums)) # Estimate time it will take # Burn the first one for slow imports test_vec_burn = basis_vec_handles[0].get() test_vec_burn_3 = test_vec_burn + 2.*test_vec_burn del test_vec_burn, test_vec_burn_3 start_time = T.time() test_vec = basis_vec_handles[0].get() get_time = T.time() - start_time start_time = T.time() test_vec_3 = test_vec + 2.*test_vec add_scale_time = T.time() - start_time del test_vec, test_vec_3 vecs_per_worker = self.max_vecs_per_node * _parallel.get_num_nodes() / \ _parallel.get_num_MPI_workers() num_gets = num_sums/(_parallel.get_num_MPI_workers()*(\ vecs_per_worker-2)) + \ num_bases/_parallel.get_num_MPI_workers() num_add_scales = num_sums*num_bases/_parallel.get_num_MPI_workers() self.print_msg('Linear combinations will take at least %.1f minutes'% (num_gets*get_time/60. + num_add_scales*add_scale_time/60.)) # convenience rank = _parallel.get_rank() # num_bases_per_proc_chunk is the num of bases each proc gets at once. num_bases_per_proc_chunk = 1 num_sums_per_proc_chunk = self.max_vecs_per_proc - \ num_bases_per_proc_chunk basis_tasks = _parallel.find_assignments(range(num_bases)) sum_tasks = _parallel.find_assignments(range(num_sums)) # Find max number tasks among all processors max_num_basis_tasks = max([len(tasks) for tasks in basis_tasks]) max_num_sum_tasks = max([len(tasks) for tasks in sum_tasks]) # These variables are the number of iters through loops that retrieve # ("get") # and "put" basis and sum vecs. num_basis_get_iters = int(N.ceil( max_num_basis_tasks*1./num_bases_per_proc_chunk)) num_sum_put_iters = int(N.ceil( max_num_sum_tasks*1./num_sums_per_proc_chunk)) if num_sum_put_iters > 1: self.print_msg('Warning: The basis vecs, ' 'of which there are %d, will be retrieved %d times each. ' 'If possible, increase number of nodes or ' 'max_vecs_per_node to reduce redundant retrieves and get a ' 'big speedup.'%(num_bases, num_sum_put_iters)) for sum_put_index in xrange(num_sum_put_iters): if len(sum_tasks[rank]) > 0: start_sum_index = min(sum_tasks[rank][0] + sum_put_index*num_sums_per_proc_chunk, sum_tasks[rank][-1]+1) end_sum_index = min(start_sum_index+num_sums_per_proc_chunk, sum_tasks[rank][-1]+1) # Create empty list on each processor sum_layers = [None]*(end_sum_index - start_sum_index) else: start_sum_index = 0 end_sum_index = 0 sum_layers = [] for basis_get_index in xrange(num_basis_get_iters): if len(basis_tasks[rank]) > 0: start_basis_index = min(basis_tasks[rank][0] + basis_get_index*num_bases_per_proc_chunk, basis_tasks[rank][-1]+1) end_basis_index = min(start_basis_index + num_bases_per_proc_chunk, basis_tasks[rank][-1]+1) basis_indices = range(start_basis_index, end_basis_index) else: basis_indices = [] # Pass the basis vecs to proc with rank -> mod(rank+1,num_procs) # Must do this for each processor, until data makes a circle basis_vecs_recv = (None, None) for pass_index in xrange(_parallel.get_num_procs()): # If on the first pass, retrieve the basis vecs, # no send/recv. # This is all that is called when in serial, # loop iterates once. if pass_index == 0: if len(basis_indices) > 0: basis_vecs = [basis_handle.get() \ for basis_handle in basis_vec_handles[ basis_indices[0]:basis_indices[-1]+1]] else: basis_vecs = [] else: # Figure out with whom to communicate source = (_parallel.get_rank()-1) % \ _parallel.get_num_procs() dest = (_parallel.get_rank()+1) % \ _parallel.get_num_procs() #Create unique tags based on ranks send_tag = _parallel.get_rank() * \ (_parallel.get_num_procs()+1) + dest recv_tag = source*(_parallel.get_num_procs()+1) + \ _parallel.get_rank() # Send/receive data basis_vecs_send = (basis_vecs, basis_indices) request = _parallel.comm.isend(basis_vecs_send, dest=dest, tag=send_tag) basis_vecs_recv = _parallel.comm.recv( source=source, tag=recv_tag) request.Wait() _parallel.barrier() basis_indices = basis_vecs_recv[1] basis_vecs = basis_vecs_recv[0] # Compute the scalar multiplications for this set of data. # basis_indices stores the indices of the coeff_mat to # use. for sum_index in xrange(start_sum_index, end_sum_index): for basis_index, basis_vec in enumerate(basis_vecs): sum_layer = basis_vec * \ coeff_mat[basis_indices[basis_index],\ sum_index] if sum_layers[sum_index-start_sum_index] is None: sum_layers[sum_index-start_sum_index] = \ sum_layer else: sum_layers[sum_index-start_sum_index] += \ sum_layer if (T.time() - self.prev_print_time) > self.print_interval: self.print_msg( 'Completed %.1f%% of linear combinations' % (sum_index*100./len(sum_tasks[rank]))) self.prev_print_time = T.time() # Completed this set of sum vecs, puts them to memory or file for sum_index in xrange(start_sum_index, end_sum_index): sum_vec_handles[sum_index].put( sum_layers[sum_index-start_sum_index]) del sum_layers self.print_msg('Completed %.1f%% of linear combinations' % 100.) self.prev_print_time = T.time() _parallel.barrier()
def compute_symmetric_inner_product_mat(self, vec_handles): """Computes an upper-triangular symmetric matrix of inner products. Args: ``vec_handles``: List of vector handles. Returns: ``IP_mat``: Numpy array of inner products. See the documentation for :py:meth:`compute_inner_product_mat` for an idea how this works. TODO: JON, write detailed documentation similar to :py:meth:`compute_inner_product_mat`. """ self._check_inner_product() vec_handles = util.make_list(vec_handles) num_vecs = len(vec_handles) # num_cols_per_chunk is the number of cols each proc gets at once. # Columns are retrieved if the matrix must be broken up into sets of # chunks. Then symmetric upper triangular portions will be computed, # followed by a rectangular piece that uses columns not already in # memory. num_cols_per_proc_chunk = 1 num_rows_per_proc_chunk = self.max_vecs_per_proc -\ num_cols_per_proc_chunk # <nprocs> chunks are computed simulaneously, making up a set. num_cols_per_chunk = num_cols_per_proc_chunk * _parallel.get_num_procs() num_rows_per_chunk = num_rows_per_proc_chunk * _parallel.get_num_procs() # <num_row_chunks> is the number of sets that must be computed. num_row_chunks = int(N.ceil(num_vecs * 1. / num_rows_per_chunk)) if num_row_chunks > 1: self.print_msg('Warning: The vecs, of which ' 'there are %d, will be retrieved %d times each. Increase ' 'number of nodes or max_vecs_per_node to reduce redundant ' '"get"s for a speedup.'%(num_vecs,num_row_chunks)) # Estimate the time this will take and determine matrix datatype # (real or complex). test_vec = vec_handles[0].get() # Burn the first, it sometimes contains slow imports IP_burn = self.inner_product(test_vec, test_vec) start_time = T.time() test_vec = vec_handles[0].get() get_time = T.time() - start_time start_time = T.time() IP = self.inner_product(test_vec, test_vec) IP_time = T.time() - start_time IP_type = type(IP) total_IP_time = (num_vecs**2 * IP_time / 2. / _parallel.get_num_procs()) vecs_per_proc = self.max_vecs_per_node * _parallel.get_num_nodes() / \ _parallel.get_num_procs() num_gets = (num_vecs**2 /2.) / ((vecs_per_proc-2) * _parallel.get_num_procs()**2) + \ num_vecs/_parallel.get_num_procs()/2. total_get_time = num_gets * get_time self.print_msg('Computing the inner product matrix will take at least ' '%.1f minutes' % ((total_IP_time + total_get_time) / 60.)) del test_vec # Use the same trick as in compute_IP_mat, having each proc # fill in elements of a num_rows x num_rows sized matrix, rather than # assembling small chunks. This is done for the triangular portions. # For the rectangular portions, the inner product mat is filled # in directly. IP_mat = N.mat(N.zeros((num_vecs, num_vecs), dtype=IP_type)) for start_row_index in xrange(0, num_vecs, num_rows_per_chunk): end_row_index = min(num_vecs, start_row_index + num_rows_per_chunk) proc_row_tasks_all = _parallel.find_assignments(range( start_row_index, end_row_index)) num_active_procs = len([task for task in \ proc_row_tasks_all if task != []]) proc_row_tasks = proc_row_tasks_all[_parallel.get_rank()] if len(proc_row_tasks)!=0: row_vecs = [vec_handle.get() for vec_handle in vec_handles[ proc_row_tasks[0]:proc_row_tasks[-1] + 1]] else: row_vecs = [] # Triangular chunks if len(proc_row_tasks) > 0: # Test that indices are consecutive if proc_row_tasks[0:] != range(proc_row_tasks[0], proc_row_tasks[-1] + 1): raise ValueError('Indices are not consecutive.') # Per-processor triangles (using only vecs in memory) for row_index in xrange(proc_row_tasks[0], proc_row_tasks[-1] + 1): # Diagonal term IP_mat[row_index, row_index] = self.\ inner_product(row_vecs[row_index - proc_row_tasks[ 0]], row_vecs[row_index - proc_row_tasks[0]]) # Off-diagonal terms for col_index in xrange(row_index + 1, proc_row_tasks[ -1] + 1): IP_mat[row_index, col_index] = self.\ inner_product(row_vecs[row_index -\ proc_row_tasks[0]], row_vecs[col_index -\ proc_row_tasks[0]]) # Number of square chunks to fill in is n * (n-1) / 2. At each # iteration we fill in n of them, so we need (n-1) / 2 # iterations (round up). for set_index in xrange(int(N.ceil((num_active_procs - 1.) / 2))): # The current proc is "sender" my_rank = _parallel.get_rank() my_row_indices = proc_row_tasks my_num_rows = len(my_row_indices) # The proc to send to is "destination" dest_rank = (my_rank + set_index + 1) % num_active_procs # This is unused? #dest_row_indices = proc_row_tasks_all[dest_rank] # The proc that data is received from is the "source" source_rank = (my_rank - set_index - 1) % num_active_procs # Find the maximum number of sends/recv to be done by any proc max_num_to_send = int(N.ceil(1. * max([len(tasks) for \ tasks in proc_row_tasks_all]) /\ num_cols_per_proc_chunk)) """ # Pad tasks with nan so that everyone has the same # number of things to send. Same for list of vecs with None. # The empty lists will not do anything when enumerated, so no # inner products will be taken. nan is inserted into the # indices because then min/max of the indices can be taken. if my_num_rows != len(row_vecs): raise ValueError('Number of rows assigned does not ' +\ 'match number of vecs in memory.') if my_num_rows > 0 and my_num_rows < max_num_to_send: my_row_indices += [N.nan] * (max_num_to_send - my_num_rows) row_vecs += [[]] * (max_num_to_send - my_num_rows) """ for send_index in xrange(max_num_to_send): # Only processors responsible for rows communicate if my_num_rows > 0: # Send row vecs, in groups of num_cols_per_proc_chunk # These become columns in the ensuing computation start_col_index = send_index * num_cols_per_proc_chunk end_col_index = min(start_col_index + num_cols_per_proc_chunk, my_num_rows) col_vecs_send = ( row_vecs[start_col_index:end_col_index], my_row_indices[start_col_index:end_col_index]) # Create unique tags based on ranks send_tag = my_rank * ( _parallel.get_num_procs() + 1) + dest_rank recv_tag = source_rank * ( _parallel.get_num_procs() + 1) + my_rank # Send and receieve data. The Wait() command after the # receive prevents a race condition not fixed by sync(). # The Wait() is very important for the non- # blocking send (though we are unsure why). request = _parallel.comm.isend(col_vecs_send, dest=dest_rank, tag=send_tag) col_vecs_recv = _parallel.comm.recv(source = source_rank, tag=recv_tag) request.Wait() col_vecs = col_vecs_recv[0] my_col_indices = col_vecs_recv[1] for row_index in xrange(my_row_indices[0], my_row_indices[-1] + 1): for col_vec_index, col_vec in enumerate(col_vecs): IP_mat[row_index, my_col_indices[ col_vec_index]] = self.inner_product( row_vecs[row_index - my_row_indices[0]], col_vec) if (T.time() - self.prev_print_time) > \ self.print_interval: num_completed_IPs = (N.abs(IP_mat)>0).sum() percent_completed_IPs = \ (100.*2*num_completed_IPs * \ _parallel.get_num_MPI_workers())/\ (num_vecs**2) self.print_msg(('Completed %.1f%% of inner ' + 'products')%percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() # Sync after send/receive _parallel.barrier() # Fill in the rectangular portion next to each triangle (if nec.). # Start at index after last row, continue to last column. This part # of the code is the same as in compute_IP_mat, as of # revision 141. for start_col_index in xrange(end_row_index, num_vecs, num_cols_per_chunk): end_col_index = min(start_col_index + num_cols_per_chunk, num_vecs) proc_col_tasks = _parallel.find_assignments(range( start_col_index, end_col_index))[_parallel.get_rank()] # Pass the col vecs to proc with rank -> mod(rank+1,numProcs) # Must do this for each processor, until data makes a circle col_vecs_recv = (None, None) if len(proc_col_tasks) > 0: col_indices = range(proc_col_tasks[0], proc_col_tasks[-1]+1) else: col_indices = [] for num_passes in xrange(_parallel.get_num_procs()): # If on the first pass, get the col vecs, no send/recv # This is all that is called when in serial, loop iterates # once. if num_passes == 0: if len(col_indices) > 0: col_vecs = [col_handle.get() \ for col_handle in vec_handles[col_indices[0]:\ col_indices[-1] + 1]] else: col_vecs = [] else: # Determine whom to communicate with dest = (_parallel.get_rank() + 1) % _parallel.\ get_num_procs() source = (_parallel.get_rank() - 1) % _parallel.\ get_num_procs() # Create unique tag based on ranks send_tag = _parallel.get_rank() * (_parallel.\ get_num_procs() + 1) + dest recv_tag = source*(_parallel.get_num_procs() + 1) +\ _parallel.get_rank() # Collect data and send/receive col_vecs_send = (col_vecs, col_indices) request = _parallel.comm.isend(col_vecs_send, dest=\ dest, tag=send_tag) col_vecs_recv = _parallel.comm.recv(source=source, tag=recv_tag) request.Wait() _parallel.barrier() col_indices = col_vecs_recv[1] col_vecs = col_vecs_recv[0] # Compute the IPs for this set of data col_indices stores # the indices of the IP_mat columns to be # filled in. if len(proc_row_tasks) > 0: for row_index in xrange(proc_row_tasks[0], proc_row_tasks[-1]+1): for col_vec_index, col_vec in enumerate(col_vecs): IP_mat[row_index, col_indices[ col_vec_index]] = self.inner_product( row_vecs[row_index - proc_row_tasks[0]], col_vec) if (T.time() - self.prev_print_time) > self.print_interval: num_completed_IPs = (N.abs(IP_mat)>0).sum() percent_completed_IPs = (100.*2*num_completed_IPs * _parallel.get_num_MPI_workers())/(num_vecs**2) self.print_msg(('Completed %.1f%% of inner ' + 'products')%percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() # Completed a chunk of rows and all columns on all processors. # Finished row_vecs loop, delete memory used del row_vecs # Assign the triangular portion chunks into IP_mat. if _parallel.is_distributed(): IP_mat = _parallel.custom_comm.allreduce(IP_mat) # Create a mask for the repeated values. Select values that are zero # in the upper triangular portion (not computed there) but nonzero in # the lower triangular portion (computed there). For the case where # the inner product is not perfectly symmetric, this will select the # computation done in the upper triangular portion. mask = N.multiply(IP_mat == 0, IP_mat.T != 0) # Collect values below diagonal IP_mat += N.multiply(N.triu(IP_mat.T, 1), mask) # Symmetrize matrix IP_mat = N.triu(IP_mat) + N.triu(IP_mat, 1).T percent_completed_IPs = 100. self.print_msg(('Completed %.1f%% of inner ' + 'products')%percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() _parallel.barrier() return IP_mat
def compute_inner_product_mat(self, row_vec_handles, col_vec_handles): """Computes the matrix of inner product combinations between vectors. Args: ``row_vec_handles``: List of row vector handles. For example BPOD adjoints, :math:`Y`. ``col_vec_handles``: List of column vector handles. For example BPOD directs, :math:`X`. Returns: ``IP_mat``: 2D array of inner products. The vecs are retrieved in memory-efficient chunks and are not all in memory at once. The row vecs and col vecs are assumed to be different. When they are the same, use :py:meth:`compute_symmetric_inner_product` for a 2x speedup. Each MPI worker (processor) is responsible for retrieving a subset of the rows and columns. The processors then send/recv columns via MPI so they can be used to compute all IPs for the rows on each MPI worker. This is repeated until all MPI workers are done with all of their row chunks. If there are 2 processors:: | x o | rank0 | x o | | x o | - | o x | rank1 | o x | | o x | In the next step, rank 0 sends column 0 to rank 1 and rank 1 sends column 1 to rank 0. The remaining IPs are filled in:: | x x | rank0 | x x | | x x | - | x x | rank1 | x x | | x x | When the number of cols and rows is not divisible by the number of processors, the processors are assigned unequal numbers of tasks. However, all processors are always part of the passing cycle. The scaling is: - num gets / processor ~ :math:`(n_r*n_c/((max-2)*n_p*n_p)) + n_r/n_p` - num MPI sends / processor ~ :math:`(n_p-1)*(n_r/((max-2)*n_p))*n_c/n_p` - num inner products / processor ~ :math:`n_r*n_c/n_p` where :math:`n_r` is number of rows, :math:`n_c` number of columns, :math:`max` is ``max_vecs_per_proc = max_vecs_per_node/num_procs_per_node``, and :math:`n_p` is the number of MPI workers (processors). If there are more rows than columns, then an internal transpose and un-transpose is performed to improve efficiency (since :math:`n_c` only appears in the scaling in the quadratic term). """ self._check_inner_product() row_vec_handles = util.make_list(row_vec_handles) col_vec_handles = util.make_list(col_vec_handles) num_cols = len(col_vec_handles) num_rows = len(row_vec_handles) if num_rows > num_cols: transpose = True temp = row_vec_handles row_vec_handles = col_vec_handles col_vec_handles = temp temp = num_rows num_rows = num_cols num_cols = temp else: transpose = False # convenience rank = _parallel.get_rank() ## Old way that worked # num_cols_per_proc_chunk is the number of cols each proc gets at once num_cols_per_proc_chunk = 1 num_rows_per_proc_chunk = self.max_vecs_per_proc - \ num_cols_per_proc_chunk # Determine how the retrieving and inner products will be split up. row_tasks = _parallel.find_assignments(range(num_rows)) col_tasks = _parallel.find_assignments(range(num_cols)) # Find max number of col tasks among all processors max_num_row_tasks = max([len(tasks) for tasks in row_tasks]) max_num_col_tasks = max([len(tasks) for tasks in col_tasks]) ## New way #if self.max_vecs_per_node > max_num_row_tasks: # num_cols_per_proc_chunk = #num_rows_per_proc_chunk = self.max_vecs_per_proc - \ # num_cols_per_proc_chunk # These variables are the number of iters through loops that retrieve # ("get") row and column vecs. num_row_get_loops = \ int(N.ceil(max_num_row_tasks*1./num_rows_per_proc_chunk)) num_col_get_loops = \ int(N.ceil(max_num_col_tasks*1./num_cols_per_proc_chunk)) if num_row_get_loops > 1: self.print_msg('Warning: The column vecs, of which ' 'there are %d, will be retrieved %d times each. Increase ' 'number of nodes or max_vecs_per_node to reduce redundant ' '"get"s for a speedup.'%(num_cols, num_row_get_loops)) # Estimate the time this will take and determine matrix datatype # (real or complex). row_vec = row_vec_handles[0].get() col_vec = col_vec_handles[0].get() # Burn the first, it sometimes contains slow imports IP_burn = self.inner_product(row_vec, col_vec) start_time = T.time() row_vec = row_vec_handles[0].get() get_time = T.time() - start_time start_time = T.time() IP = self.inner_product(row_vec, col_vec) IP_time = T.time() - start_time IP_type = type(IP) total_IP_time = (num_rows * num_cols * IP_time / _parallel.get_num_procs()) vecs_per_proc = self.max_vecs_per_node * _parallel.get_num_nodes() / \ _parallel.get_num_procs() num_gets = (num_rows*num_cols) / ((vecs_per_proc-2) * _parallel.get_num_procs()**2) + num_rows/_parallel.get_num_procs() total_get_time = num_gets * get_time self.print_msg('Computing the inner product matrix will take at least ' '%.1f minutes' % ((total_IP_time + total_get_time) / 60.)) del row_vec, col_vec # To find all of the inner product mat chunks, each # processor has a full IP_mat with size # num_rows x num_cols even though each processor is not responsible for # filling in all of these entries. After each proc fills in what it is # responsible for, the other entries remain 0's. Then, an allreduce # is done and all the IP_mats are summed. This is simpler # concatenating chunks of the IPmats. # The efficiency is not an issue, the size of the mats # are small compared to the size of the vecs for large data. IP_mat = N.mat(N.zeros((num_rows, num_cols), dtype=IP_type)) for row_get_index in xrange(num_row_get_loops): if len(row_tasks[rank]) > 0: start_row_index = min(row_tasks[rank][0] + row_get_index*num_rows_per_proc_chunk, row_tasks[rank][-1]+1) end_row_index = min(row_tasks[rank][-1]+1, start_row_index + num_rows_per_proc_chunk) row_vecs = [row_vec_handle.get() for row_vec_handle in row_vec_handles[start_row_index:end_row_index]] else: row_vecs = [] for col_get_index in xrange(num_col_get_loops): if len(col_tasks[rank]) > 0: start_col_index = min(col_tasks[rank][0] + col_get_index*num_cols_per_proc_chunk, col_tasks[rank][-1]+1) end_col_index = min(col_tasks[rank][-1]+1, start_col_index + num_cols_per_proc_chunk) else: start_col_index = 0 end_col_index = 0 # Cycle the col vecs to proc with rank -> mod(rank+1,num_procs) # Must do this for each processor, until data makes a circle col_vecs_recv = (None, None) col_indices = range(start_col_index, end_col_index) for pass_index in xrange(_parallel.get_num_procs()): #if rank==0: print 'starting pass index=',pass_index # If on the first pass, get the col vecs, no send/recv # This is all that is called when in serial, loop iterates # once. if pass_index == 0: col_vecs = [col_handle.get() for col_handle in col_vec_handles[start_col_index: end_col_index]] else: # Determine with whom to communicate dest = (rank + 1) % _parallel.get_num_procs() source = (rank - 1)%_parallel.get_num_procs() # Create unique tag based on send/recv ranks send_tag = rank * \ (_parallel.get_num_procs() + 1) + dest recv_tag = source * \ (_parallel.get_num_procs() + 1) + rank # Collect data and send/receive col_vecs_send = (col_vecs, col_indices) request = _parallel.comm.isend( col_vecs_send, dest=dest, tag=send_tag) col_vecs_recv = _parallel.comm.recv( source=source, tag=recv_tag) request.Wait() _parallel.barrier() col_indices = col_vecs_recv[1] col_vecs = col_vecs_recv[0] # Compute the IPs for this set of data col_indices stores # the indices of the IP_mat columns to be # filled in. if len(row_vecs) > 0: for row_index in xrange(start_row_index, end_row_index): for col_vec_index, col_vec in enumerate(col_vecs): IP_mat[row_index, col_indices[ col_vec_index]] = self.inner_product( row_vecs[row_index - start_row_index], col_vec) if (T.time() - self.prev_print_time) > \ self.print_interval: num_completed_IPs = (N.abs(IP_mat)>0).sum() percent_completed_IPs = (100. * num_completed_IPs* _parallel.get_num_MPI_workers()) / ( num_cols*num_rows) self.print_msg(('Completed %.1f%% of inner ' + 'products')%percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() # Clear the retrieved column vecs after done this pass cycle del col_vecs # Completed a chunk of rows and all columns on all processors. del row_vecs # Assign these chunks into IP_mat. if _parallel.is_distributed(): IP_mat = _parallel.custom_comm.allreduce(IP_mat) if transpose: IP_mat = IP_mat.T percent_completed_IPs = 100. self.print_msg(('Completed %.1f%% of inner ' + 'products')%percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() _parallel.barrier() return IP_mat
def threshold_gate(fdarray, thresholds, relative_to = None, unapplied = True, progress = True): """ unapplied - Include a setting where a threshold gate is not applied on a channel. e.g., if gating on CD45 and CD34, include a combination where only CD34 is gated on """ # Standardize input fdarray = util.make_list(fdarray) # Build a list of coordinates to iterate over based on the provided # thresholds. coords = [] keys = list(thresholds.keys()) # Number of populations that will be considered npop = 1 for key in keys: nsplits = len(thresholds[key])+1 if unapplied is True: coords.append(range(-1,nsplits)) npop *= nsplits+1 else: coords.append(range(0,nsplits)) npop *= nsplits # Pre-compute action of thresholds (saves about 40%) all_coord_splits = [] for fd in fdarray: coord_splits = [] for key, coord in zip(keys,coords): splits = [] for c in coord: cuts = thresholds[key] if c == 0: val = (fd[key] < cuts[0]) elif c == len(cuts): val = (fd[key]) > cuts[-1] elif c > 0: val = (fd[key] > cuts[c-1]) & (fd[key]< cuts[c]) if c >=0: splits.append(val) splits = np.vstack(splits) coord_splits.append(splits) all_coord_splits.append(coord_splits) # Similarly precompute the baseline number if relative_to is None: baselines = [ fd.shape[0] for fd in fdarray] else: baselines = [ np.sum(fd[relative_to]) for fd in fdarray] # Iterate over every permutation row_names = [] rows = [] scores = [] product_iter = itertools.product(*coords) if progress: product_iter = progress_bar(product_iter, expected_size = npop, update_every = 500) for coord in product_iter: string_tmp = '' # Construct the name of the current set for key, c in zip(keys, coord): cuts = thresholds[key] if len(cuts) == 1: if c == 0: string_tmp += '/{}- '.format(key) elif c == 1: string_tmp += '/{}+ '.format(key) elif len(cuts) == 2: if c == 0: string_tmp += '/{}- '.format(key) elif c == 1: string_tmp += '/{} lo'.format(key) elif c == 2: string_tmp += '/{} hi'.format(key) # Now build the counts for each set row = np.zeros((len(fdarray),) ) for j, (baseline, fd, coord_splits) in enumerate(zip(baselines, fdarray, all_coord_splits)): val = np.ones( (fd.shape[0],), dtype = bool) for c, splits in zip(coord, coord_splits): if c >= 0: val *= splits[c] count = np.sum(val) row[j] = count/baseline # Now see if the row we have gathered is interesting # Every non-empty/ non-constant row will be included score = ((np.max(row) - np.min(row))/np.max(row))*np.mean(row) if score > 0: row_names.append(string_tmp) rows.append(row) scores.append(score) # Rank rows scores = np.array(scores) I = np.argsort(-scores) rows = np.vstack(rows).T rows = rows[:,I] row_names = [row_names[j] for j in I] return rows, row_names
def threshold_gate(fdarray, thresholds, relative_to=None, unapplied=True, progress=True): """ unapplied - Include a setting where a threshold gate is not applied on a channel. e.g., if gating on CD45 and CD34, include a combination where only CD34 is gated on """ # Standardize input fdarray = util.make_list(fdarray) # Build a list of coordinates to iterate over based on the provided # thresholds. coords = [] keys = list(thresholds.keys()) # Number of populations that will be considered npop = 1 for key in keys: nsplits = len(thresholds[key]) + 1 if unapplied is True: coords.append(range(-1, nsplits)) npop *= nsplits + 1 else: coords.append(range(0, nsplits)) npop *= nsplits # Pre-compute action of thresholds (saves about 40%) all_coord_splits = [] for fd in fdarray: coord_splits = [] for key, coord in zip(keys, coords): splits = [] for c in coord: cuts = thresholds[key] if c == 0: val = (fd[key] < cuts[0]) elif c == len(cuts): val = (fd[key]) > cuts[-1] elif c > 0: val = (fd[key] > cuts[c - 1]) & (fd[key] < cuts[c]) if c >= 0: splits.append(val) splits = np.vstack(splits) coord_splits.append(splits) all_coord_splits.append(coord_splits) # Similarly precompute the baseline number if relative_to is None: baselines = [fd.shape[0] for fd in fdarray] else: baselines = [np.sum(fd[relative_to]) for fd in fdarray] # Iterate over every permutation row_names = [] rows = [] scores = [] product_iter = itertools.product(*coords) if progress: product_iter = progress_bar(product_iter, expected_size=npop, update_every=500) for coord in product_iter: string_tmp = '' # Construct the name of the current set for key, c in zip(keys, coord): cuts = thresholds[key] if len(cuts) == 1: if c == 0: string_tmp += '/{}- '.format(key) elif c == 1: string_tmp += '/{}+ '.format(key) elif len(cuts) == 2: if c == 0: string_tmp += '/{}- '.format(key) elif c == 1: string_tmp += '/{} lo'.format(key) elif c == 2: string_tmp += '/{} hi'.format(key) # Now build the counts for each set row = np.zeros((len(fdarray), )) for j, (baseline, fd, coord_splits) in enumerate( zip(baselines, fdarray, all_coord_splits)): val = np.ones((fd.shape[0], ), dtype=bool) for c, splits in zip(coord, coord_splits): if c >= 0: val *= splits[c] count = np.sum(val) row[j] = count / baseline # Now see if the row we have gathered is interesting # Every non-empty/ non-constant row will be included score = ((np.max(row) - np.min(row)) / np.max(row)) * np.mean(row) if score > 0: row_names.append(string_tmp) rows.append(row) scores.append(score) # Rank rows scores = np.array(scores) I = np.argsort(-scores) rows = np.vstack(rows).T rows = rows[:, I] row_names = [row_names[j] for j in I] return rows, row_names
def __init__(self, tree): self.expression = self.build(tree.expression) self.cases = [self.build(i) for i in make_list(tree.switch_cases)]
def compute_decomp(self, vec_handles, adv_vec_handles=None): """Computes decomposition and returns eigen decomposition matrices. Args: ``vec_handles``: List of handles for the vectors. Kwargs: ``adv_vec_handles``: List of handles of ``vecs`` advanced in time. If not provided, it is assumed that the vectors are a sequential time-series. Thus ``vec_handles`` becomes ``vec_handless[:-1]`` and ``adv_vec_handles`` becomes ``vec_handles[1:]``. Returns: ``ritz_vals``: 1D array of Ritz values. ``mode_norms``: 1D array of mode norms. ``build_coeffs``: Matrix of build coefficients for modes. """ if vec_handles is not None: self.vec_handles = util.make_list(vec_handles) if self.vec_handles is None: raise util.UndefinedError('vec_handles is not given') if adv_vec_handles is not None: self.adv_vec_handles = util.make_list(adv_vec_handles) if len(self.vec_handles) != len(self.adv_vec_handles): raise ValueError(('Number of vec_handles and adv_vec_handles' ' is not equal.')) # For a sequential dataset, compute correlation mat for all vectors. # This is more efficient because only one call is made to the inner # product routine, even though we don't need the last row/column yet. # Later we need all but the last element of the last column, so it is # faster to compute all of this now. Only one extra element is # computed, since this is a symmetric inner product matrix. Then # slice the expanded correlation matrix accordingly. if adv_vec_handles is None: self.expanded_correlation_mat =\ self.vec_space.compute_symmetric_inner_product_mat( self.vec_handles) self.correlation_mat = self.expanded_correlation_mat[:-1, :-1] # For non-sequential data, compute the correlation matrix from the # unadvanced snapshots only. else: self.correlation_mat = \ self.vec_space.compute_symmetric_inner_product_mat( self.vec_handles) # Compute eigendecomposition of correlation matrix self.correlation_mat_evals, self.correlation_mat_evecs = \ _parallel.call_and_bcast(util.eigh, self.correlation_mat, is_positive_definite=True) correlation_mat_evals_sqrt = N.mat( N.diag(self.correlation_mat_evals**-0.5)) # Compute low-order linear map for sequential snapshot set. This takes # advantage of the fact that for a sequential dataset, the unadvanced # and advanced vectors overlap. if self.adv_vec_handles is None: self.low_order_linear_map = correlation_mat_evals_sqrt *\ self.correlation_mat_evecs.H *\ self.expanded_correlation_mat[:-1, 1:] *\ self.correlation_mat_evecs * correlation_mat_evals_sqrt # Compute low-order linear map for non-squential snapshot set else: self.low_order_linear_map = correlation_mat_evals_sqrt *\ self.correlation_mat_evecs.H *\ self.vec_space.compute_inner_product_mat(self.vec_handles, self.adv_vec_handles) * self.correlation_mat_evecs *\ correlation_mat_evals_sqrt # Compute eigendecomposition of low-order linear map, finish DMD # computation. self._compute_eigen_decomp() if (self.mode_norms < 0).any() and self.verbosity > 0 and \ _parallel.is_rank_zero(): print >> output_channel, ( 'Warning: mode norms has negative ' 'values. This is often happens ' 'when the rank of the vector matrix is much less than the ' 'number of columns. Try using fewer vectors (fewer columns).') return self.ritz_vals, self.mode_norms, self.build_coeffs
def scandb_snps(snps, pval): snps = make_list(snps) params = urllib.urlencode({ 'list': ",".join(snps), 'snpinfo': 1, 'expr': 1, 'pval': pval, 'output': 'tab' }) headers = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain" } # Submit query to ScanDB. try: conn = httplib.HTTPConnection(SCANDB_HOST, timeout=CON_TIMEOUT) conn.request("POST", "/newinterface/snpinfo.php", params, headers) response = conn.getresponse() data = response.read() conn.close() except: print >> sys.stderr, "Error: query to ScanDB failed. The message was:" print >> sys.stderr, str(sys.exc_info()[1]) print >> sys.stderr, "ScanDB itself may be down, or your internet connection may have failed." conn.close() return [] # exit # If the response status wasn't OK, there's something wrong. Bail out. if response.status != 200: print >> sys.stderr, "Error: query to ScanDB failed. The response was:" print >> sys.stderr, "%s %s" % (str( response.status), str(response.reason)) return [] # If the query itself failed for some reason, bail out. elif data == None: return [] # exit # Checks passed - let's parse our data. parsed = eQTLSet() for line in data.split("\n")[1:]: if line == "": continue e = line.split("\t") snp = e[0] for q in e[6].split(":"): qs = q.split() if qs[0] != "NA": eqtl = eQTL(snp, qs[0]) eqtl.population = qs[1] eqtl.source = "SCAN" eqtl.organism = "H**o sapiens" eqtl.tissue = "LCL" # Need to be careful in casting p-value. If it's not a float, it could # crash the whole program. try: eqtl.pval = float(qs[2]) except: eqtl.pval = "NA" parsed.add(eqtl) return parsed
def __init__(self, tree): self.type = __init__.ASTType.ASTType(tree.type) self.dimensions = [self.build(i) for i in make_list(tree.dimensions)] self.initializer = self.build(tree.initializer)
def tsne(fdarray, new_label = 'tsne', channels = None, transform = 'arcsinh', sample = 6000, verbose = False, backgate = True): """Perform t-SNE/viSNE on the FlowData object """ fdarray = util.make_list(fdarray) # If the user has not provided a list of channels to use, # use the intersection of all isotope channels if channels is None: channel_set = [] for fd in fdarray: channel_set.append(set(fd.isotopes)) channels = list(set.intersection(*channel_set)) # Make a copy of the data in files that we want points = [] for fd in fdarray: points.append(np.vstack([ fd[ch] for ch in channels ]).T) # transform if transform == 'arcsinh': for pts in points: # Apply the transform inplace to the data np.arcsinh(5*pts, pts) # Randomly sample to reduce the number of points sample_masks = [] for pts in points: if sample < pts.shape[0]: # If we have enough points to subsample sample_masks.append(np.random.choice(pts.shape[0], sample, replace = False)) else: # Otherwise we add all the points sample_masks.append(np.array(range(pts.shape[0]))) # Sample the points, and construct a large matrix sample_points = [] for mask, pts in zip(sample_masks, points): sample_points.append(pts[mask,:]) X = np.vstack(sample_points) # Perform t-SNE Y = lib_tsne.tsne(X, verbose = verbose) assert Y is not None, ('t-SNE failed to return') # Split Y into a matrix for each dataset splits = np.cumsum( np.array([ mask.shape[0] for mask in sample_masks], dtype = int)) Y_split = np.split(Y, splits, axis = 0) # now expand data to reassign these points back into the dataset tsne_coords = [] for (pts, mask, Yspt) in zip(points, sample_masks, Y_split): npoints = pts.shape[0] Z = np.zeros((npoints, 2))*float('NaN') Z[mask,:] = Yspt tsne_coords.append(Z) # If a point didn't get sampled, place its t-SNE coordinates at its nearest # neighbor. if backgate: kd = KDTree(X) # select points not assigned values with t-SNE for pts, mask, coords, j in zip(points, sample_masks, tsne_coords, range(len(points))): nan_points = np.argwhere(np.isnan(coords[:,0])) d,near = kd.query(pts[nan_points],1) # convert back to coordinates on the whole dataset coords[nan_points, :] = Y[near,:] tsne_coords[j] = coords # add to data to FlowData structure for fd, coords in zip(fdarray, tsne_coords): fd[new_label+'1'] = coords[:,0] fd[new_label+'2'] = coords[:,1]
def __init__(self, tree): self.cases = [self.build(i) for i in make_list(tree.cases)] self.body = [self.build(i) for i in make_list(tree.body)]
def compute_symmetric_inner_product_mat(self, vec_handles): """Computes an upper-triangular symmetric matrix of inner products. Args: ``vec_handles``: List of vector handles. Returns: ``IP_mat``: Numpy array of inner products. See the documentation for :py:meth:`compute_inner_product_mat` for an idea how this works. TODO: JON, write detailed documentation similar to :py:meth:`compute_inner_product_mat`. """ self._check_inner_product() vec_handles = util.make_list(vec_handles) num_vecs = len(vec_handles) # num_cols_per_chunk is the number of cols each proc gets at once. # Columns are retrieved if the matrix must be broken up into sets of # chunks. Then symmetric upper triangular portions will be computed, # followed by a rectangular piece that uses columns not already in # memory. num_cols_per_proc_chunk = 1 num_rows_per_proc_chunk = self.max_vecs_per_proc -\ num_cols_per_proc_chunk # <nprocs> chunks are computed simulaneously, making up a set. num_cols_per_chunk = num_cols_per_proc_chunk * _parallel.get_num_procs( ) num_rows_per_chunk = num_rows_per_proc_chunk * _parallel.get_num_procs( ) # <num_row_chunks> is the number of sets that must be computed. num_row_chunks = int(N.ceil(num_vecs * 1. / num_rows_per_chunk)) if num_row_chunks > 1: self.print_msg( 'Warning: The vecs, of which ' 'there are %d, will be retrieved %d times each. Increase ' 'number of nodes or max_vecs_per_node to reduce redundant ' '"get"s for a speedup.' % (num_vecs, num_row_chunks)) # Estimate the time this will take and determine matrix datatype # (real or complex). test_vec = vec_handles[0].get() # Burn the first, it sometimes contains slow imports IP_burn = self.inner_product(test_vec, test_vec) start_time = T.time() test_vec = vec_handles[0].get() get_time = T.time() - start_time start_time = T.time() IP = self.inner_product(test_vec, test_vec) IP_time = T.time() - start_time IP_type = type(IP) total_IP_time = (num_vecs**2 * IP_time / 2. / _parallel.get_num_procs()) vecs_per_proc = self.max_vecs_per_node * _parallel.get_num_nodes() / \ _parallel.get_num_procs() num_gets = (num_vecs**2 /2.) / ((vecs_per_proc-2) * _parallel.get_num_procs()**2) + \ num_vecs/_parallel.get_num_procs()/2. total_get_time = num_gets * get_time self.print_msg('Computing the inner product matrix will take at least ' '%.1f minutes' % ((total_IP_time + total_get_time) / 60.)) del test_vec # Use the same trick as in compute_IP_mat, having each proc # fill in elements of a num_rows x num_rows sized matrix, rather than # assembling small chunks. This is done for the triangular portions. # For the rectangular portions, the inner product mat is filled # in directly. IP_mat = N.mat(N.zeros((num_vecs, num_vecs), dtype=IP_type)) for start_row_index in xrange(0, num_vecs, num_rows_per_chunk): end_row_index = min(num_vecs, start_row_index + num_rows_per_chunk) proc_row_tasks_all = _parallel.find_assignments( range(start_row_index, end_row_index)) num_active_procs = len([task for task in \ proc_row_tasks_all if task != []]) proc_row_tasks = proc_row_tasks_all[_parallel.get_rank()] if len(proc_row_tasks) != 0: row_vecs = [ vec_handle.get() for vec_handle in vec_handles[proc_row_tasks[0]:proc_row_tasks[-1] + 1] ] else: row_vecs = [] # Triangular chunks if len(proc_row_tasks) > 0: # Test that indices are consecutive if proc_row_tasks[0:] != range(proc_row_tasks[0], proc_row_tasks[-1] + 1): raise ValueError('Indices are not consecutive.') # Per-processor triangles (using only vecs in memory) for row_index in xrange(proc_row_tasks[0], proc_row_tasks[-1] + 1): # Diagonal term IP_mat[row_index, row_index] = self.\ inner_product(row_vecs[row_index - proc_row_tasks[ 0]], row_vecs[row_index - proc_row_tasks[0]]) # Off-diagonal terms for col_index in xrange(row_index + 1, proc_row_tasks[-1] + 1): IP_mat[row_index, col_index] = self.\ inner_product(row_vecs[row_index -\ proc_row_tasks[0]], row_vecs[col_index -\ proc_row_tasks[0]]) # Number of square chunks to fill in is n * (n-1) / 2. At each # iteration we fill in n of them, so we need (n-1) / 2 # iterations (round up). for set_index in xrange(int(N.ceil((num_active_procs - 1.) / 2))): # The current proc is "sender" my_rank = _parallel.get_rank() my_row_indices = proc_row_tasks my_num_rows = len(my_row_indices) # The proc to send to is "destination" dest_rank = (my_rank + set_index + 1) % num_active_procs # This is unused? #dest_row_indices = proc_row_tasks_all[dest_rank] # The proc that data is received from is the "source" source_rank = (my_rank - set_index - 1) % num_active_procs # Find the maximum number of sends/recv to be done by any proc max_num_to_send = int(N.ceil(1. * max([len(tasks) for \ tasks in proc_row_tasks_all]) /\ num_cols_per_proc_chunk)) """ # Pad tasks with nan so that everyone has the same # number of things to send. Same for list of vecs with None. # The empty lists will not do anything when enumerated, so no # inner products will be taken. nan is inserted into the # indices because then min/max of the indices can be taken. if my_num_rows != len(row_vecs): raise ValueError('Number of rows assigned does not ' +\ 'match number of vecs in memory.') if my_num_rows > 0 and my_num_rows < max_num_to_send: my_row_indices += [N.nan] * (max_num_to_send - my_num_rows) row_vecs += [[]] * (max_num_to_send - my_num_rows) """ for send_index in xrange(max_num_to_send): # Only processors responsible for rows communicate if my_num_rows > 0: # Send row vecs, in groups of num_cols_per_proc_chunk # These become columns in the ensuing computation start_col_index = send_index * num_cols_per_proc_chunk end_col_index = min( start_col_index + num_cols_per_proc_chunk, my_num_rows) col_vecs_send = ( row_vecs[start_col_index:end_col_index], my_row_indices[start_col_index:end_col_index]) # Create unique tags based on ranks send_tag = my_rank * (_parallel.get_num_procs() + 1) + dest_rank recv_tag = source_rank * (_parallel.get_num_procs() + 1) + my_rank # Send and receieve data. The Wait() command after the # receive prevents a race condition not fixed by sync(). # The Wait() is very important for the non- # blocking send (though we are unsure why). request = _parallel.comm.isend(col_vecs_send, dest=dest_rank, tag=send_tag) col_vecs_recv = _parallel.comm.recv(source=source_rank, tag=recv_tag) request.Wait() col_vecs = col_vecs_recv[0] my_col_indices = col_vecs_recv[1] for row_index in xrange(my_row_indices[0], my_row_indices[-1] + 1): for col_vec_index, col_vec in enumerate(col_vecs): IP_mat[row_index, my_col_indices[ col_vec_index]] = self.inner_product( row_vecs[row_index - my_row_indices[0]], col_vec) if (T.time() - self.prev_print_time) > \ self.print_interval: num_completed_IPs = (N.abs(IP_mat) > 0).sum() percent_completed_IPs = \ (100.*2*num_completed_IPs * \ _parallel.get_num_MPI_workers())/\ (num_vecs**2) self.print_msg( ('Completed %.1f%% of inner ' + 'products') % percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() # Sync after send/receive _parallel.barrier() # Fill in the rectangular portion next to each triangle (if nec.). # Start at index after last row, continue to last column. This part # of the code is the same as in compute_IP_mat, as of # revision 141. for start_col_index in xrange(end_row_index, num_vecs, num_cols_per_chunk): end_col_index = min(start_col_index + num_cols_per_chunk, num_vecs) proc_col_tasks = _parallel.find_assignments( range(start_col_index, end_col_index))[_parallel.get_rank()] # Pass the col vecs to proc with rank -> mod(rank+1,numProcs) # Must do this for each processor, until data makes a circle col_vecs_recv = (None, None) if len(proc_col_tasks) > 0: col_indices = range(proc_col_tasks[0], proc_col_tasks[-1] + 1) else: col_indices = [] for num_passes in xrange(_parallel.get_num_procs()): # If on the first pass, get the col vecs, no send/recv # This is all that is called when in serial, loop iterates # once. if num_passes == 0: if len(col_indices) > 0: col_vecs = [col_handle.get() \ for col_handle in vec_handles[col_indices[0]:\ col_indices[-1] + 1]] else: col_vecs = [] else: # Determine whom to communicate with dest = (_parallel.get_rank() + 1) % _parallel.\ get_num_procs() source = (_parallel.get_rank() - 1) % _parallel.\ get_num_procs() # Create unique tag based on ranks send_tag = _parallel.get_rank() * (_parallel.\ get_num_procs() + 1) + dest recv_tag = source*(_parallel.get_num_procs() + 1) +\ _parallel.get_rank() # Collect data and send/receive col_vecs_send = (col_vecs, col_indices) request = _parallel.comm.isend(col_vecs_send, dest=\ dest, tag=send_tag) col_vecs_recv = _parallel.comm.recv(source=source, tag=recv_tag) request.Wait() _parallel.barrier() col_indices = col_vecs_recv[1] col_vecs = col_vecs_recv[0] # Compute the IPs for this set of data col_indices stores # the indices of the IP_mat columns to be # filled in. if len(proc_row_tasks) > 0: for row_index in xrange(proc_row_tasks[0], proc_row_tasks[-1] + 1): for col_vec_index, col_vec in enumerate(col_vecs): IP_mat[row_index, col_indices[ col_vec_index]] = self.inner_product( row_vecs[row_index - proc_row_tasks[0]], col_vec) if (T.time() - self.prev_print_time) > self.print_interval: num_completed_IPs = (N.abs(IP_mat) > 0).sum() percent_completed_IPs = ( 100. * 2 * num_completed_IPs * _parallel.get_num_MPI_workers()) / (num_vecs** 2) self.print_msg( ('Completed %.1f%% of inner ' + 'products') % percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() # Completed a chunk of rows and all columns on all processors. # Finished row_vecs loop, delete memory used del row_vecs # Assign the triangular portion chunks into IP_mat. if _parallel.is_distributed(): IP_mat = _parallel.custom_comm.allreduce(IP_mat) # Create a mask for the repeated values. Select values that are zero # in the upper triangular portion (not computed there) but nonzero in # the lower triangular portion (computed there). For the case where # the inner product is not perfectly symmetric, this will select the # computation done in the upper triangular portion. mask = N.multiply(IP_mat == 0, IP_mat.T != 0) # Collect values below diagonal IP_mat += N.multiply(N.triu(IP_mat.T, 1), mask) # Symmetrize matrix IP_mat = N.triu(IP_mat) + N.triu(IP_mat, 1).T percent_completed_IPs = 100. self.print_msg(('Completed %.1f%% of inner ' + 'products') % percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() _parallel.barrier() return IP_mat
def __init__(self, tree): self.contents = [self.build(i) for i in make_list(tree.statements)]
def tsne(fdarray, new_label='tsne', channels=None, transform='arcsinh', sample=6000, verbose=False, backgate=True): """Perform t-SNE/viSNE on the FlowData object """ fdarray = util.make_list(fdarray) # If the user has not provided a list of channels to use, # use the intersection of all isotope channels if channels is None: channel_set = [] for fd in fdarray: channel_set.append(set(fd.isotopes)) channels = list(set.intersection(*channel_set)) # Make a copy of the data in files that we want points = [] for fd in fdarray: points.append(np.vstack([fd[ch] for ch in channels]).T) # transform if transform == 'arcsinh': for pts in points: # Apply the transform inplace to the data np.arcsinh(5 * pts, pts) # Randomly sample to reduce the number of points sample_masks = [] for pts in points: if sample < pts.shape[0]: # If we have enough points to subsample sample_masks.append( np.random.choice(pts.shape[0], sample, replace=False)) else: # Otherwise we add all the points sample_masks.append(np.array(range(pts.shape[0]))) # Sample the points, and construct a large matrix sample_points = [] for mask, pts in zip(sample_masks, points): sample_points.append(pts[mask, :]) X = np.vstack(sample_points) # Perform t-SNE Y = lib_tsne.tsne(X, verbose=verbose) assert Y is not None, ('t-SNE failed to return') # Split Y into a matrix for each dataset splits = np.cumsum( np.array([mask.shape[0] for mask in sample_masks], dtype=int)) Y_split = np.split(Y, splits, axis=0) # now expand data to reassign these points back into the dataset tsne_coords = [] for (pts, mask, Yspt) in zip(points, sample_masks, Y_split): npoints = pts.shape[0] Z = np.zeros((npoints, 2)) * float('NaN') Z[mask, :] = Yspt tsne_coords.append(Z) # If a point didn't get sampled, place its t-SNE coordinates at its nearest # neighbor. if backgate: kd = KDTree(X) # select points not assigned values with t-SNE for pts, mask, coords, j in zip(points, sample_masks, tsne_coords, range(len(points))): nan_points = np.argwhere(np.isnan(coords[:, 0])) d, near = kd.query(pts[nan_points], 1) # convert back to coordinates on the whole dataset coords[nan_points, :] = Y[near, :] tsne_coords[j] = coords # add to data to FlowData structure for fd, coords in zip(fdarray, tsne_coords): fd[new_label + '1'] = coords[:, 0] fd[new_label + '2'] = coords[:, 1]
def compute_inner_product_mat(self, row_vec_handles, col_vec_handles): """Computes the matrix of inner product combinations between vectors. Args: ``row_vec_handles``: List of row vector handles. For example BPOD adjoints, :math:`Y`. ``col_vec_handles``: List of column vector handles. For example BPOD directs, :math:`X`. Returns: ``IP_mat``: 2D array of inner products. The vecs are retrieved in memory-efficient chunks and are not all in memory at once. The row vecs and col vecs are assumed to be different. When they are the same, use :py:meth:`compute_symmetric_inner_product` for a 2x speedup. Each MPI worker (processor) is responsible for retrieving a subset of the rows and columns. The processors then send/recv columns via MPI so they can be used to compute all IPs for the rows on each MPI worker. This is repeated until all MPI workers are done with all of their row chunks. If there are 2 processors:: | x o | rank0 | x o | | x o | - | o x | rank1 | o x | | o x | In the next step, rank 0 sends column 0 to rank 1 and rank 1 sends column 1 to rank 0. The remaining IPs are filled in:: | x x | rank0 | x x | | x x | - | x x | rank1 | x x | | x x | When the number of cols and rows is not divisible by the number of processors, the processors are assigned unequal numbers of tasks. However, all processors are always part of the passing cycle. The scaling is: - num gets / processor ~ :math:`(n_r*n_c/((max-2)*n_p*n_p)) + n_r/n_p` - num MPI sends / processor ~ :math:`(n_p-1)*(n_r/((max-2)*n_p))*n_c/n_p` - num inner products / processor ~ :math:`n_r*n_c/n_p` where :math:`n_r` is number of rows, :math:`n_c` number of columns, :math:`max` is ``max_vecs_per_proc = max_vecs_per_node/num_procs_per_node``, and :math:`n_p` is the number of MPI workers (processors). If there are more rows than columns, then an internal transpose and un-transpose is performed to improve efficiency (since :math:`n_c` only appears in the scaling in the quadratic term). """ self._check_inner_product() row_vec_handles = util.make_list(row_vec_handles) col_vec_handles = util.make_list(col_vec_handles) num_cols = len(col_vec_handles) num_rows = len(row_vec_handles) if num_rows > num_cols: transpose = True temp = row_vec_handles row_vec_handles = col_vec_handles col_vec_handles = temp temp = num_rows num_rows = num_cols num_cols = temp else: transpose = False # convenience rank = _parallel.get_rank() ## Old way that worked # num_cols_per_proc_chunk is the number of cols each proc gets at once num_cols_per_proc_chunk = 1 num_rows_per_proc_chunk = self.max_vecs_per_proc - \ num_cols_per_proc_chunk # Determine how the retrieving and inner products will be split up. row_tasks = _parallel.find_assignments(range(num_rows)) col_tasks = _parallel.find_assignments(range(num_cols)) # Find max number of col tasks among all processors max_num_row_tasks = max([len(tasks) for tasks in row_tasks]) max_num_col_tasks = max([len(tasks) for tasks in col_tasks]) ## New way #if self.max_vecs_per_node > max_num_row_tasks: # num_cols_per_proc_chunk = #num_rows_per_proc_chunk = self.max_vecs_per_proc - \ # num_cols_per_proc_chunk # These variables are the number of iters through loops that retrieve # ("get") row and column vecs. num_row_get_loops = \ int(N.ceil(max_num_row_tasks*1./num_rows_per_proc_chunk)) num_col_get_loops = \ int(N.ceil(max_num_col_tasks*1./num_cols_per_proc_chunk)) if num_row_get_loops > 1: self.print_msg( 'Warning: The column vecs, of which ' 'there are %d, will be retrieved %d times each. Increase ' 'number of nodes or max_vecs_per_node to reduce redundant ' '"get"s for a speedup.' % (num_cols, num_row_get_loops)) # Estimate the time this will take and determine matrix datatype # (real or complex). row_vec = row_vec_handles[0].get() col_vec = col_vec_handles[0].get() # Burn the first, it sometimes contains slow imports IP_burn = self.inner_product(row_vec, col_vec) start_time = T.time() row_vec = row_vec_handles[0].get() get_time = T.time() - start_time start_time = T.time() IP = self.inner_product(row_vec, col_vec) IP_time = T.time() - start_time IP_type = type(IP) total_IP_time = (num_rows * num_cols * IP_time / _parallel.get_num_procs()) vecs_per_proc = self.max_vecs_per_node * _parallel.get_num_nodes() / \ _parallel.get_num_procs() num_gets = (num_rows * num_cols) / ( (vecs_per_proc - 2) * _parallel.get_num_procs()** 2) + num_rows / _parallel.get_num_procs() total_get_time = num_gets * get_time self.print_msg('Computing the inner product matrix will take at least ' '%.1f minutes' % ((total_IP_time + total_get_time) / 60.)) del row_vec, col_vec # To find all of the inner product mat chunks, each # processor has a full IP_mat with size # num_rows x num_cols even though each processor is not responsible for # filling in all of these entries. After each proc fills in what it is # responsible for, the other entries remain 0's. Then, an allreduce # is done and all the IP_mats are summed. This is simpler # concatenating chunks of the IPmats. # The efficiency is not an issue, the size of the mats # are small compared to the size of the vecs for large data. IP_mat = N.mat(N.zeros((num_rows, num_cols), dtype=IP_type)) for row_get_index in xrange(num_row_get_loops): if len(row_tasks[rank]) > 0: start_row_index = min( row_tasks[rank][0] + row_get_index * num_rows_per_proc_chunk, row_tasks[rank][-1] + 1) end_row_index = min(row_tasks[rank][-1] + 1, start_row_index + num_rows_per_proc_chunk) row_vecs = [ row_vec_handle.get() for row_vec_handle in row_vec_handles[start_row_index:end_row_index] ] else: row_vecs = [] for col_get_index in xrange(num_col_get_loops): if len(col_tasks[rank]) > 0: start_col_index = min( col_tasks[rank][0] + col_get_index * num_cols_per_proc_chunk, col_tasks[rank][-1] + 1) end_col_index = min( col_tasks[rank][-1] + 1, start_col_index + num_cols_per_proc_chunk) else: start_col_index = 0 end_col_index = 0 # Cycle the col vecs to proc with rank -> mod(rank+1,num_procs) # Must do this for each processor, until data makes a circle col_vecs_recv = (None, None) col_indices = range(start_col_index, end_col_index) for pass_index in xrange(_parallel.get_num_procs()): #if rank==0: print 'starting pass index=',pass_index # If on the first pass, get the col vecs, no send/recv # This is all that is called when in serial, loop iterates # once. if pass_index == 0: col_vecs = [ col_handle.get() for col_handle in col_vec_handles[start_col_index:end_col_index] ] else: # Determine with whom to communicate dest = (rank + 1) % _parallel.get_num_procs() source = (rank - 1) % _parallel.get_num_procs() # Create unique tag based on send/recv ranks send_tag = rank * \ (_parallel.get_num_procs() + 1) + dest recv_tag = source * \ (_parallel.get_num_procs() + 1) + rank # Collect data and send/receive col_vecs_send = (col_vecs, col_indices) request = _parallel.comm.isend(col_vecs_send, dest=dest, tag=send_tag) col_vecs_recv = _parallel.comm.recv(source=source, tag=recv_tag) request.Wait() _parallel.barrier() col_indices = col_vecs_recv[1] col_vecs = col_vecs_recv[0] # Compute the IPs for this set of data col_indices stores # the indices of the IP_mat columns to be # filled in. if len(row_vecs) > 0: for row_index in xrange(start_row_index, end_row_index): for col_vec_index, col_vec in enumerate(col_vecs): IP_mat[row_index, col_indices[ col_vec_index]] = self.inner_product( row_vecs[row_index - start_row_index], col_vec) if (T.time() - self.prev_print_time) > \ self.print_interval: num_completed_IPs = (N.abs(IP_mat) > 0).sum() percent_completed_IPs = ( 100. * num_completed_IPs * _parallel.get_num_MPI_workers()) / (num_cols * num_rows) self.print_msg( ('Completed %.1f%% of inner ' + 'products') % percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() # Clear the retrieved column vecs after done this pass cycle del col_vecs # Completed a chunk of rows and all columns on all processors. del row_vecs # Assign these chunks into IP_mat. if _parallel.is_distributed(): IP_mat = _parallel.custom_comm.allreduce(IP_mat) if transpose: IP_mat = IP_mat.T percent_completed_IPs = 100. self.print_msg(('Completed %.1f%% of inner ' + 'products') % percent_completed_IPs, sys.stderr) self.prev_print_time = T.time() _parallel.barrier() return IP_mat
def __init__(self, tree): self.elements = [self.build(i) for i in make_list(tree.elements)]
def lin_combine(self, sum_vec_handles, basis_vec_handles, coeff_mat, coeff_mat_col_indices=None): """Linearly combines the basis vecs and calls ``put`` on result. Args: ``sum_vec_handles``: List of handles for the sum vectors. ``basis_vec_handles``: List of handles for the basis vecs. ``coeff_mat``: Matrix with rows corresponding to a basis vecs and columns to sum (lin. comb.) vecs. The rows and columns correspond, by index, to the lists basis_vec_handles and sum_vec_handles. ``sums = basis * coeff_mat`` Kwargs: ``coeff_mat_col_indices``: List of column indices. The sum_vecs corresponding to these col indices are computed. Each processor retrieves a subset of the basis vecs to compute as many outputs as a processor can have in memory at once. Each processor computes the "layers" from the basis it is resonsible for, and for as many modes as it can fit in memory. The layers from all procs are summed together to form the sum_vecs and ``put`` ed. Scaling is: num gets/worker = :math:`n_s/(n_p*(max-2)) * n_b/n_p` passes/worker = :math:`(n_p-1) * n_s/(n_p*(max-2)) * (n_b/n_p)` scalar multiplies/worker = :math:`n_s*n_b/n_p` Where :math:`n_s` is number of sum vecs, :math:`n_b` is number of basis vecs, :math:`n_p` is number of processors, :math:`max` = ``max_vecs_per_node``. """ sum_vec_handles = util.make_list(sum_vec_handles) basis_vec_handles = util.make_list(basis_vec_handles) num_bases = len(basis_vec_handles) num_sums = len(sum_vec_handles) if coeff_mat_col_indices is not None: coeff_mat = coeff_mat[:, coeff_mat_col_indices] if num_bases != coeff_mat.shape[0]: raise ValueError(('Number of coeff_mat rows (%d) does not equal ' 'number of basis handles (%d)' % (coeff_mat.shape[0], num_bases))) if num_sums != coeff_mat.shape[1]: raise ValueError(('Number of coeff_mat cols (%d) does not equal ' 'number of output handles (%d)') % (coeff_mat.shape[1], num_sums)) # Estimate time it will take # Burn the first one for slow imports test_vec_burn = basis_vec_handles[0].get() test_vec_burn_3 = test_vec_burn + 2. * test_vec_burn del test_vec_burn, test_vec_burn_3 start_time = T.time() test_vec = basis_vec_handles[0].get() get_time = T.time() - start_time start_time = T.time() test_vec_3 = test_vec + 2. * test_vec add_scale_time = T.time() - start_time del test_vec, test_vec_3 vecs_per_worker = self.max_vecs_per_node * _parallel.get_num_nodes() / \ _parallel.get_num_MPI_workers() num_gets = num_sums/(_parallel.get_num_MPI_workers()*(\ vecs_per_worker-2)) + \ num_bases/_parallel.get_num_MPI_workers() num_add_scales = num_sums * num_bases / _parallel.get_num_MPI_workers() self.print_msg('Linear combinations will take at least %.1f minutes' % (num_gets * get_time / 60. + num_add_scales * add_scale_time / 60.)) # convenience rank = _parallel.get_rank() # num_bases_per_proc_chunk is the num of bases each proc gets at once. num_bases_per_proc_chunk = 1 num_sums_per_proc_chunk = self.max_vecs_per_proc - \ num_bases_per_proc_chunk basis_tasks = _parallel.find_assignments(range(num_bases)) sum_tasks = _parallel.find_assignments(range(num_sums)) # Find max number tasks among all processors max_num_basis_tasks = max([len(tasks) for tasks in basis_tasks]) max_num_sum_tasks = max([len(tasks) for tasks in sum_tasks]) # These variables are the number of iters through loops that retrieve # ("get") # and "put" basis and sum vecs. num_basis_get_iters = int( N.ceil(max_num_basis_tasks * 1. / num_bases_per_proc_chunk)) num_sum_put_iters = int( N.ceil(max_num_sum_tasks * 1. / num_sums_per_proc_chunk)) if num_sum_put_iters > 1: self.print_msg( 'Warning: The basis vecs, ' 'of which there are %d, will be retrieved %d times each. ' 'If possible, increase number of nodes or ' 'max_vecs_per_node to reduce redundant retrieves and get a ' 'big speedup.' % (num_bases, num_sum_put_iters)) for sum_put_index in xrange(num_sum_put_iters): if len(sum_tasks[rank]) > 0: start_sum_index = min( sum_tasks[rank][0] + sum_put_index * num_sums_per_proc_chunk, sum_tasks[rank][-1] + 1) end_sum_index = min(start_sum_index + num_sums_per_proc_chunk, sum_tasks[rank][-1] + 1) # Create empty list on each processor sum_layers = [None] * (end_sum_index - start_sum_index) else: start_sum_index = 0 end_sum_index = 0 sum_layers = [] for basis_get_index in xrange(num_basis_get_iters): if len(basis_tasks[rank]) > 0: start_basis_index = min( basis_tasks[rank][0] + basis_get_index * num_bases_per_proc_chunk, basis_tasks[rank][-1] + 1) end_basis_index = min( start_basis_index + num_bases_per_proc_chunk, basis_tasks[rank][-1] + 1) basis_indices = range(start_basis_index, end_basis_index) else: basis_indices = [] # Pass the basis vecs to proc with rank -> mod(rank+1,num_procs) # Must do this for each processor, until data makes a circle basis_vecs_recv = (None, None) for pass_index in xrange(_parallel.get_num_procs()): # If on the first pass, retrieve the basis vecs, # no send/recv. # This is all that is called when in serial, # loop iterates once. if pass_index == 0: if len(basis_indices) > 0: basis_vecs = [basis_handle.get() \ for basis_handle in basis_vec_handles[ basis_indices[0]:basis_indices[-1]+1]] else: basis_vecs = [] else: # Figure out with whom to communicate source = (_parallel.get_rank()-1) % \ _parallel.get_num_procs() dest = (_parallel.get_rank()+1) % \ _parallel.get_num_procs() #Create unique tags based on ranks send_tag = _parallel.get_rank() * \ (_parallel.get_num_procs()+1) + dest recv_tag = source*(_parallel.get_num_procs()+1) + \ _parallel.get_rank() # Send/receive data basis_vecs_send = (basis_vecs, basis_indices) request = _parallel.comm.isend(basis_vecs_send, dest=dest, tag=send_tag) basis_vecs_recv = _parallel.comm.recv(source=source, tag=recv_tag) request.Wait() _parallel.barrier() basis_indices = basis_vecs_recv[1] basis_vecs = basis_vecs_recv[0] # Compute the scalar multiplications for this set of data. # basis_indices stores the indices of the coeff_mat to # use. for sum_index in xrange(start_sum_index, end_sum_index): for basis_index, basis_vec in enumerate(basis_vecs): sum_layer = basis_vec * \ coeff_mat[basis_indices[basis_index],\ sum_index] if sum_layers[sum_index - start_sum_index] is None: sum_layers[sum_index-start_sum_index] = \ sum_layer else: sum_layers[sum_index-start_sum_index] += \ sum_layer if (T.time() - self.prev_print_time) > self.print_interval: self.print_msg( 'Completed %.1f%% of linear combinations' % (sum_index * 100. / len(sum_tasks[rank]))) self.prev_print_time = T.time() # Completed this set of sum vecs, puts them to memory or file for sum_index in xrange(start_sum_index, end_sum_index): sum_vec_handles[sum_index].put(sum_layers[sum_index - start_sum_index]) del sum_layers self.print_msg('Completed %.1f%% of linear combinations' % 100.) self.prev_print_time = T.time() _parallel.barrier()
def scandb_snps(snps,pval): snps = make_list(snps); params = urllib.urlencode({ 'list' : ",".join(snps), 'snpinfo' : 1, 'expr' : 1, 'pval' : pval, 'output' : 'tab' }); headers = { "Content-type" : "application/x-www-form-urlencoded", "Accept": "text/plain" }; # Submit query to ScanDB. try: conn = httplib.HTTPConnection(SCANDB_HOST,timeout=CON_TIMEOUT); conn.request("POST","/newinterface/snpinfo.php",params,headers); response = conn.getresponse(); data = response.read(); conn.close(); except: print >> sys.stderr, "Error: query to ScanDB failed. The message was:"; print >> sys.stderr, str(sys.exc_info()[1]); print >> sys.stderr, "ScanDB itself may be down, or your internet connection may have failed."; conn.close(); return []; # exit # If the response status wasn't OK, there's something wrong. Bail out. if response.status != 200: print >> sys.stderr, "Error: query to ScanDB failed. The response was:"; print >> sys.stderr, "%s %s" % (str(response.status),str(response.reason)); return []; # If the query itself failed for some reason, bail out. elif data == None: return []; # exit # Checks passed - let's parse our data. parsed = eQTLSet(); for line in data.split("\n")[1:]: if line == "": continue; e = line.split("\t"); snp = e[0]; for q in e[6].split(":"): qs = q.split(); if qs[0] != "NA": eqtl = eQTL(snp,qs[0]); eqtl.population = qs[1]; eqtl.source = "SCAN"; eqtl.organism = "H**o sapiens" eqtl.tissue = "LCL"; # Need to be careful in casting p-value. If it's not a float, it could # crash the whole program. try: eqtl.pval = float(qs[2]); except: eqtl.pval = "NA"; parsed.add(eqtl); return parsed;
def __init__(self, tree): self.predicate = self.build(tree.predicate) self.init_statement = [self.build(i) for i in make_list(tree.init)] self.update_statement = [self.build(i) for i in make_list(tree.update)] self.content = self.build(tree.body)
def compute_decomp(self, vec_handles, adv_vec_handles=None): """Computes decomposition and returns eigen decomposition matrices. Args: ``vec_handles``: List of handles for the vectors. Kwargs: ``adv_vec_handles``: List of handles of ``vecs`` advanced in time. If not provided, it is assumed that the vectors are a sequential time-series. Thus ``vec_handles`` becomes ``vec_handless[:-1]`` and ``adv_vec_handles`` becomes ``vec_handles[1:]``. Returns: ``ritz_vals``: 1D array of Ritz values. ``mode_norms``: 1D array of mode norms. ``build_coeffs``: Matrix of build coefficients for modes. """ if vec_handles is not None: self.vec_handles = util.make_list(vec_handles) if self.vec_handles is None: raise util.UndefinedError('vec_handles is not given') if adv_vec_handles is not None: self.adv_vec_handles = util.make_list(adv_vec_handles) if len(self.vec_handles) != len(self.adv_vec_handles): raise ValueError(('Number of vec_handles and adv_vec_handles' ' is not equal.')) # For a sequential dataset, compute correlation mat for all vectors. # This is more efficient because only one call is made to the inner # product routine, even though we don't need the last row/column yet. # Later we need all but the last element of the last column, so it is # faster to compute all of this now. Only one extra element is # computed, since this is a symmetric inner product matrix. Then # slice the expanded correlation matrix accordingly. if adv_vec_handles is None: self.expanded_correlation_mat =\ self.vec_space.compute_symmetric_inner_product_mat( self.vec_handles) self.correlation_mat = self.expanded_correlation_mat[:-1, :-1] # For non-sequential data, compute the correlation matrix from the # unadvanced snapshots only. else: self.correlation_mat = \ self.vec_space.compute_symmetric_inner_product_mat( self.vec_handles) # Compute eigendecomposition of correlation matrix self.correlation_mat_evals, self.correlation_mat_evecs = \ _parallel.call_and_bcast(util.eigh, self.correlation_mat, is_positive_definite=True) correlation_mat_evals_sqrt = N.mat(N.diag( self.correlation_mat_evals**-0.5)) # Compute low-order linear map for sequential snapshot set. This takes # advantage of the fact that for a sequential dataset, the unadvanced # and advanced vectors overlap. if self.adv_vec_handles is None: self.low_order_linear_map = correlation_mat_evals_sqrt *\ self.correlation_mat_evecs.H *\ self.expanded_correlation_mat[:-1, 1:] *\ self.correlation_mat_evecs * correlation_mat_evals_sqrt # Compute low-order linear map for non-squential snapshot set else: self.low_order_linear_map = correlation_mat_evals_sqrt *\ self.correlation_mat_evecs.H *\ self.vec_space.compute_inner_product_mat(self.vec_handles, self.adv_vec_handles) * self.correlation_mat_evecs *\ correlation_mat_evals_sqrt # Compute eigendecomposition of low-order linear map, finish DMD # computation. self._compute_eigen_decomp() if (self.mode_norms < 0).any() and self.verbosity > 0 and \ _parallel.is_rank_zero(): print >> output_channel, ('Warning: mode norms has negative ' 'values. This is often happens ' 'when the rank of the vector matrix is much less than the ' 'number of columns. Try using fewer vectors (fewer columns).') return self.ritz_vals, self.mode_norms, self.build_coeffs