def reset_weights(self): logger.info("resetting layer weights") # default layer1_size = 100 in gensim, Seger changed to 50 # REAL is float32 self.syn0 = empty((len(self.vocab), self.layer1_size), dtype=REAL) # randomize weights vector by vector, rather than materializing a huge random matrix in RAM at once for i in xrange(len(self.vocab)): random.seed(uint32(self.hashfxn(self.index2word[i] + str(self.seed)))) if self.pre_train: word = self.index2word[i] if word in self.uni_emb and not np_isnan(self.uni_emb[word]).any(): self.syn0[i] = np_copy(self.uni_emb.syn1neg[self.uni_emb.vocab[word].index]) # self.syn0[i] = np_copy(self.uni_emb[word]) # print '##word', word, 'vec=', self.syn0[i] elif word in self.bi_emb and not np_isnan(self.bi_emb[word]).any(): self.syn0[i] = np_copy(self.bi_emb.syn1neg[self.bi_emb.vocab[word].index]) # self.syn0[i] = np_copy(self.bi_emb[word]) else: self.syn0[i] = (random.rand(self.layer1_size) - 0.5) / self.layer1_size else: self.syn0[i] = (random.rand(self.layer1_size) - 0.5) / self.layer1_size self.syn1neg = zeros((len(self.vocab), self.pred_size), dtype=REAL)
def reset_weights(self): logger.info("resetting layer weights") self.syn0 = empty((len(self.vocab), self.layer1_size), dtype=REAL) # randomize weights vector by vector, rather than materializing a huge random matrix in RAM at once for i in xrange(len(self.vocab)): random.seed( uint32(self.hashfxn(self.index2word[i] + str(self.seed)))) if self.pre_train: word = self.index2word[i] if word in self.uni_emb and not np_isnan( self.uni_emb[word]).any(): self.syn0[i] = np_copy( self.uni_emb.syn1neg[self.uni_emb.vocab[word].index]) #self.syn0[i] = np_copy(self.uni_emb[word]) # print '##word', word, 'vec=', self.syn0[i] elif word in self.bi_emb and not np_isnan( self.bi_emb[word]).any(): self.syn0[i] = np_copy( self.bi_emb.syn1neg[self.bi_emb.vocab[word].index]) #self.syn0[i] = np_copy(self.bi_emb[word]) else: self.syn0[i] = (random.rand(self.layer1_size) - 0.5) / self.layer1_size else: self.syn0[i] = (random.rand(self.layer1_size) - 0.5) / self.layer1_size self.syn1neg = zeros((len(self.vocab), self.pred_size), dtype=REAL)
def bernoulli_N_iteration(x, N=0): '''N iteration of bernoulli map ''' if N == 0: N = mnp.param['iter']['scale'].get() xn = np_copy(x) for i in range(N): y = bernoulli_map(xn) xn = np_copy(y) return y
def rot_Fi(self): """ rot_Fi Rotation inverse de la face avant (Front) """ temp = np_copy(self.cubes['FRD']) self.cubes['FRD'][0] = self.cubes['LFD'][1] self.cubes['FRD'][1] = self.cubes['LFD'][2] self.cubes['FRD'][2] = self.cubes['LFD'][0] self.cubes['LFD'][0] = self.cubes['LFU'][2] self.cubes['LFD'][1] = self.cubes['LFU'][1] self.cubes['LFD'][2] = self.cubes['LFU'][0] self.cubes['LFU'][0] = self.cubes['FRU'][2] self.cubes['LFU'][1] = self.cubes['FRU'][0] self.cubes['LFU'][2] = self.cubes['FRU'][1] self.cubes['FRU'][0] = temp[0] self.cubes['FRU'][1] = temp[2] self.cubes['FRU'][2] = temp[1] self.cubes['FL'], self.cubes['FD'], self.cubes['FR'], self.cubes['FU'] \ = self.cubes['FU'], self.cubes['FL'], self.cubes['FD'], self.cubes['FR']
def rot_Bi(self): """ rot_Bi Rotation inverse de la face arrière (Back) """ temp = np_copy(self.cubes['RBD']) self.cubes['RBD'][0] = self.cubes['RBU'][2] self.cubes['RBD'][1] = self.cubes['RBU'][1] self.cubes['RBD'][2] = self.cubes['RBU'][0] self.cubes['RBU'][0] = self.cubes['BLU'][2] self.cubes['RBU'][1] = self.cubes['BLU'][0] self.cubes['RBU'][2] = self.cubes['BLU'][1] self.cubes['BLU'][0] = self.cubes['BLD'][0] self.cubes['BLU'][1] = self.cubes['BLD'][2] self.cubes['BLU'][2] = self.cubes['BLD'][1] self.cubes['BLD'][0] = temp[1] self.cubes['BLD'][1] = temp[2] self.cubes['BLD'][2] = temp[0] self.cubes['BR'], self.cubes['BD'], self.cubes['BL'], self.cubes['BU'] \ = self.cubes['BU'], self.cubes['BR'], self.cubes['BD'], self.cubes['BL']
def __percentile_constraint_exact(A, x, y, constr, had_slack=False): """ Form exact version of DVH constraint. Arguments: A: Structure-specific dose matrix to use in constraint. x (:class:`cvxpy.Variable`): Beam intensity variable. y: Vector of doses, feasible with respect to constraint ``constr``. constr (:class:`PercentileConstraint`): Dose constraint. slack (:obj:`bool`, optional): If ``True``, include slack variable in constraint formulation. Returns: :class:`cvxpy.Constraint`: :mod:`cvxpy` representation of exact dose constraint. Raises: TypeError: If `constr` not of type `PercentileConstraint`. """ if not isinstance(constr, PercentileConstraint): raise TypeError('parameter constr must be of type {}. ' 'Provided: {}' ''.format(Constraint, type(constr))) sign = 1 if constr.upper else -1 dose = constr.dose_achieved if had_slack else constr.dose idx_exact = constr.get_maxmargin_fulfillers(y, had_slack) A_exact = np_copy(A[idx_exact, :]) return sign * (A_exact * x - dose.value) <= 0
def insert(self, index: int, to_add: Any) -> 'NoteSequence': validate_type('index', index, int) new_notes = to_add.note_attr_vals if len(new_notes.shape) == 1: new_notes_num_attributes = new_notes.shape[0] else: new_notes_num_attributes = new_notes.shape[1] if len(self.note_attr_vals): num_attributes = self.note_attr_vals.shape[1] else: num_attributes = 0 if num_attributes and num_attributes != new_notes_num_attributes: raise NoteSequenceInvalidAppendException( 'NoteSequence inserted into a NoteSequence must have the same number of attributes') if len(self.note_attr_vals): self.note_attr_vals = np_insert(self.note_attr_vals, index, new_notes, axis=0) else: # Must copy the list of the underlying note array to initialize storage for a NoteSequence # because NoteSequence arrays are 2D if len(new_notes.shape) == 1: new_notes = [new_notes] self.note_attr_vals = np_copy(new_notes) self.update_range_map() return self
def _set_cluster(self, cluster=None): if cluster is None: self.cluster = get_cluster(self.cv_image, self.no_of_colors) return np_copy(self.cluster.labels_) else: self.cluster = cluster return apply_cluster(self.cv_image, self.cluster)
def copy(source: 'NoteSequence') -> 'NoteSequence': validate_type('source', source, NoteSequence) copy = NoteSequence(num_notes=len(source), child_sequences=source.child_sequences, mn=source.mn) # Copy the underlying np array from source note sequence to target copy.note_attr_vals = np_copy(source.note_attr_vals) return copy
def passed_test(dtype, as_matrix, x_is_row, y_is_row, provide_y, stride): """ Run one vector copy test. Arguments: dtype: either 'float64' or 'float32', the NumPy dtype to test as_matrix: True to test a NumPy matrix, False to test a NumPy ndarray x_is_row: True to test a row vector as parameter x, False to test a column vector y_is_row: True to test a row vector as parameter y, False to test a column vector provide_y: True if y is to be provided to the BLASpy function, False otherwise stride: stride of x and y to test; if None, a random stride is assigned Returns: True if the expected result is within the margin of error of the actual result, False otherwise. """ # generate random sizes for vector dimensions and vector stride (if necessary) length = randint(N_MIN, N_MAX) stride = randint(N_MIN, STRIDE_MAX) if stride is None else stride # create random vectors to test x = random_vector(length, x_is_row, dtype, as_matrix) y = random_vector(length, y_is_row, dtype, as_matrix) if provide_y else None # create view of x that can be used to calculate the expected result x_2 = x.T if x_is_row else x # compute the expected result if stride == 1: y_2 = x_2 else: # y is provided if provide_y: y_2 = np_copy(y.T) if y_is_row else np_copy(y) for i in range(0, length, stride): y_2[i, 0] = x_2[i, 0] # get the actual result y = copy(x, y, stride, stride) # if y is a row vector, make y_2 a row vector as well if y.shape[0] == 1: y_2 = y_2.T # compare the actual result to the expected result and return result of the test return allclose(y, y_2)
def array_pow(a, k): """ Calculate the matrix power as a^k. Args: a(numpy.array): a square matrix to be mutiplied. k(int): the power index. """ retval = np_copy(a) for i in xrange(1, k): retval = np_dot(retval, a) return retval
def insert_paa(self, ts_paa): """ Function that inserts a new sequence in PAA format :param ts_paa: The new Paa sequence """ self.sequences.append(ts_paa) """ indicator maj """ self.nb_sequences += 1 # calculate mean and std if self.nb_sequences == 1: self.sum = np_copy(ts_paa) self.mean = np_copy(ts_paa) self.std = np_zeros(self.tree.size_word) self.sn = np_zeros(self.tree.size_word) else: mean_moins_1 = np_copy(self.mean) self.sum += ts_paa self.mean = self.sum / self.nb_sequences self.sn += (ts_paa - mean_moins_1) * (ts_paa - self.mean) self.std = np_sqrt(self.sn / self.nb_sequences)
def rot_Ri(self): """ rot_Ri Rotation inverse de la face droite (Right) """ temp = np_copy(self.cubes['FRD']) self.cubes['FRD'][0] = self.cubes['FRU'][2] self.cubes['FRD'][1] = self.cubes['FRU'][1] self.cubes['FRD'][2] = self.cubes['FRU'][0] self.cubes['FRU'][0] = self.cubes['RBU'][2] self.cubes['FRU'][1] = self.cubes['RBU'][0] self.cubes['FRU'][2] = self.cubes['RBU'][1] self.cubes['RBU'][0] = self.cubes['RBD'][0] self.cubes['RBU'][1] = self.cubes['RBD'][2] self.cubes['RBU'][2] = self.cubes['RBD'][1] self.cubes['RBD'][0] = temp[1] self.cubes['RBD'][1] = temp[2] self.cubes['RBD'][2] = temp[0] temp = np_copy(self.cubes['RD']) self.cubes['RD'][0] = self.cubes['FR'][1] self.cubes['RD'][1] = self.cubes['FR'][0] self.cubes['FR'][0] = self.cubes['RU'][1] self.cubes['FR'][1] = self.cubes['RU'][0] self.cubes['RU'][0] = self.cubes['BR'][1] self.cubes['RU'][1] = self.cubes['BR'][0] self.cubes['BR'][0] = temp[1] self.cubes['BR'][1] = temp[0]
def rot_Li(self): """ rot_Li Rotation inverse de la face gauche (Left) """ temp = np_copy(self.cubes['BLU']) self.cubes['BLU'][0] = self.cubes['LFU'][2] self.cubes['BLU'][1] = self.cubes['LFU'][0] self.cubes['BLU'][2] = self.cubes['LFU'][1] self.cubes['LFU'][0] = self.cubes['LFD'][0] self.cubes['LFU'][1] = self.cubes['LFD'][2] self.cubes['LFU'][2] = self.cubes['LFD'][1] self.cubes['LFD'][0] = self.cubes['BLD'][1] self.cubes['LFD'][1] = self.cubes['BLD'][2] self.cubes['LFD'][2] = self.cubes['BLD'][0] self.cubes['BLD'][0] = temp[2] self.cubes['BLD'][1] = temp[1] self.cubes['BLD'][2] = temp[0] temp = np_copy(self.cubes['LD']) self.cubes['LD'][0] = self.cubes['BL'][1] self.cubes['LD'][1] = self.cubes['BL'][0] self.cubes['BL'][0] = self.cubes['LU'][1] self.cubes['BL'][1] = self.cubes['LU'][0] self.cubes['LU'][0] = self.cubes['FL'][1] self.cubes['LU'][1] = self.cubes['FL'][0] self.cubes['FL'][0] = temp[1] self.cubes['FL'][1] = temp[0]
def extend(self, note_sequence: 'NoteSequence') -> 'NoteSequence': validate_type('note_sequence', note_sequence, NoteSequence) if len(self.note_attr_vals) and self.note_attr_vals[0].shape != note_sequence.note_attr_vals[0].shape: raise NoteSequenceInvalidAppendException( 'NoteSequence extended to a NoteSequence must have the same number of attributes') # Either this is the first note in the sequence, or it's not # If it is, make this sequence the note_attr_vals of this sequence. If it is not, append these notes # to the existing sequence -- we have already confirmed the shapes conform if existing sequence is not empty. if len(self.note_attr_vals): self.note_attr_vals = np_concatenate((self.note_attr_vals, note_sequence.note_attr_vals)) else: self.note_attr_vals = np_copy(note_sequence.note_attr_vals) self.update_range_map() return self
def __init__(self, tree, parent, sax, cardinality, sequences): """ Initialization function of the InternalNode class :returns: a root node :rtype: RootNode """ """ inherits the init function of the rootnode class """ RootNode.__init__(self, tree=tree, parent=parent, sax=sax, cardinality=cardinality) """ transforms the list sequences from PAA""" list_ts_paa = self.tree.isax.transform_paa(sequences) tmp_mean = np_mean(list_ts_paa, axis=0) tmp_stdev = np_std(list_ts_paa, axis=0) """ as it is an internal node, it necessarily has at least one downhill node so : """ """ we calculate the future candidate cardinalities """ cardinality_next_tmp = np_copy(self.cardinality) # if max_card if self.tree.boolean_card_max: # we multiply by 2 only the cardinalities not exceeding the authorized threshold cardinality_next_tmp[cardinality_next_tmp <= self.tree.max_card_alphabet] *= 2 else: # We multiply by 2 all the cardinalities (they are all candidates) cardinality_next_tmp *= 2 # The self.split function choses the cardinality index to multiply by 2 position_min = self.split(cardinality_next_tmp, tmp_mean, tmp_stdev) """ We write the next cardinality (for its leaf nodes) """ self.cardinality_next = np_copy(self.cardinality) self.cardinality_next[position_min] *= 2 if self.tree.bigger_current_cardinality < self.cardinality_next[position_min]: self.tree.bigger_current_cardinality = self.cardinality_next[position_min] self.level = parent.level + 1
def copy(source: 'Measure') -> 'Measure': new_measure = Measure(meter=source.meter, swing=source.swing, num_notes=source.num_notes, mn=MakeNoteConfig.copy(source.mn), performance_attrs=source.performance_attrs) # Copy the underlying np array from source note before constructing a Measure (and parent class NoteSequence) # from the source. This is because both of those __init__()s construct new storage and notes from the # measure's MakeNoteConfig. If that has attr_vals_default_map set it will use that to construct the notes. # But we want copy ctor semantics, not ctor semantics. So we have to repeat the same logic as is found # in NoteSequence.copy() and copy the underlying note storage from source to target. new_measure.note_attr_vals = np_copy(source.note_attr_vals) new_measure.beat = source.beat new_measure.next_note_start = source.next_note_start return new_measure
def __init__(self, tree, parent, sax, cardinality): """ Initialization function of the rootnode class :returns: a root node :rtype: RootNode """ self.iSAX_word = np_array([sax, cardinality]).T Node.__init__(self, parent=parent, name=str(self.iSAX_word)) self.tree = tree self.sax = sax self.cardinality = cardinality self.cardinality_next = np_copy(self.cardinality) self.cardinality_next = np_array([x*2 for x in self.cardinality_next]) # Number of sequences contained in the node (or by its sons) self.nb_sequences = 0 """ The incremental computing part for CFOF """ self.mean = np_empty(shape=self.tree.size_word) # Allows the incremental calculation of self.mean self.sum = np_empty(shape=self.tree.size_word) self.std = np_empty(shape=self.tree.size_word) # Allows the incremental calculation of self.std self.sn = np_empty(shape=self.tree.size_word) # Specific to internal nodes self.nodes = [] self.key_nodes = {} self.terminal = False self.level = 0 self.id = RootNode.id_global RootNode.id_global += 1
def rot_UR(self): """ rot_UR Rotation du cube sur lui même dans le sens Up --> Right """ self.rot_F() self.rot_Bi() temp = np_copy(self.cubes['RU']) self.cubes['RU'][0] = self.cubes['LU'][1] self.cubes['RU'][1] = self.cubes['LU'][0] self.cubes['LU'][0] = self.cubes['LD'][1] self.cubes['LU'][1] = self.cubes['LD'][0] self.cubes['LD'][0] = self.cubes['RD'][1] self.cubes['LD'][1] = self.cubes['RD'][0] self.cubes['RD'][0] = temp[1] self.cubes['RD'][1] = temp[0]
def rot_UF(self): """ rot_UF Rotation du cube sur lui même dans le sens Up --> Front """ self.rot_L() self.rot_Ri() temp = np_copy(self.cubes['FD']) self.cubes['FD'][0] = self.cubes['FU'][1] self.cubes['FD'][1] = self.cubes['FU'][0] self.cubes['FU'][0] = self.cubes['BU'][1] self.cubes['FU'][1] = self.cubes['BU'][0] self.cubes['BU'][0] = self.cubes['BD'][1] self.cubes['BU'][1] = self.cubes['BD'][0] self.cubes['BD'][0] = temp[1] self.cubes['BD'][1] = temp[0]
def rot_FR(self): """ rot_FR Rotation du cube sur lui même dans le sens Front --> Right """ self.rot_D() self.rot_Ui() temp = np_copy(self.cubes['FR']) self.cubes['FR'][0] = self.cubes['FL'][1] self.cubes['FR'][1] = self.cubes['FL'][0] self.cubes['FL'][0] = self.cubes['BL'][1] self.cubes['FL'][1] = self.cubes['BL'][0] self.cubes['BL'][0] = self.cubes['BR'][1] self.cubes['BL'][1] = self.cubes['BR'][0] self.cubes['BR'][0] = temp[1] self.cubes['BR'][1] = temp[0]
def _group_linkage_intersection(self): r"""Return the group linkage intersection of the tokens in src and tar. This is based on group linkage, as defined by :cite:`On:2007`. Most of this method is concerned with solving the assignment problem, in order to find the weight of the maximum weight bipartite matching. If the system has SciPy installed, we use it's linear_sum_assignment function to get the assignments. Otherwise, we use the Hungarian algorithm of Munkres :cite:`Munkres:1957`, implemented in Python & Numpy. .. versionadded:: 0.4.0 """ intersection = self._crisp_intersection() src_only = sorted(self._src_tokens - self._tar_tokens) tar_only = sorted(self._tar_tokens - self._src_tokens) if linear_sum_assignment and not ( 'internal_assignment_problem' in self.params and self.params['internal_assignment_problem'] ): arr = np_zeros((len(tar_only), len(src_only))) for col in range(len(src_only)): for row in range(len(tar_only)): arr[row, col] = self.params['metric'].dist( src_only[col], tar_only[row] ) for row, col in zip(*linear_sum_assignment(arr)): sim = 1.0 - arr[row, col] if sim >= self.params['threshold']: intersection[src_only[col]] += (sim / 2) * ( self._src_tokens - self._tar_tokens )[src_only[col]] intersection[tar_only[row]] += (sim / 2) * ( self._tar_tokens - self._src_tokens )[tar_only[row]] else: n = max(len(tar_only), len(src_only)) arr = np_zeros((n, n), dtype=float) for col in range(len(src_only)): for row in range(len(tar_only)): arr[row, col] = self.params['metric'].dist( src_only[col], tar_only[row] ) src_only += [''] * (n - len(src_only)) tar_only += [''] * (n - len(tar_only)) orig_sim = 1 - np_copy(arr) # Step 1 for row in range(n): arr[row, :] -= arr[row, :].min() # Step 2 for col in range(n): arr[:, col] -= arr[:, col].min() while True: # Step 3 assignments = {} allocated_cols = set() allocated_rows = set() assigned_rows = set() assigned_cols = set() for row in range(n): if (arr[row, :] == 0.0).sum() == 1: col = arr[row, :].argmin() if col not in allocated_cols: assignments[row, col] = orig_sim[row, col] allocated_cols.add(col) assigned_rows.add(row) assigned_cols.add(col) for col in range(n): if (arr[:, col] == 0.0).sum() == 1: row = arr[:, col].argmin() if row not in allocated_rows: assignments[row, col] = orig_sim[row, col] allocated_rows.add(row) assigned_rows.add(row) assigned_cols.add(col) if len(assignments) == n: break marked_rows = {_ for _ in range(n) if _ not in assigned_rows} marked_cols = set() for row in sorted(set(marked_rows)): for col, mark in enumerate(arr[row, :] == 0.0): if mark: marked_cols.add(col) for row2 in range(n): if (row2, col) in assignments: marked_rows.add(row2) if n - len(marked_rows) + len(marked_cols) == n: # We have sufficient lines for col in range(n): row = arr[:, col].argmin() assignments[row, col] = orig_sim[row, col] break # Step 4 min_val = arr[tuple(marked_rows), :][ :, sorted(set(range(n)) - marked_cols) ].min() for row in range(n): for col in range(n): if row in marked_rows and col not in marked_cols: arr[row, col] -= min_val elif row not in marked_rows and col in marked_cols: arr[row, col] += min_val for row, col in assignments.keys(): sim = orig_sim[row, col] if sim >= self.params['threshold']: intersection[src_only[col]] += (sim / 2) * ( self._src_tokens - self._tar_tokens )[src_only[col]] intersection[tar_only[row]] += (sim / 2) * ( self._tar_tokens - self._src_tokens )[tar_only[row]] return intersection
def insert_paa(self, new_paa): """ The insert_paa(new_paa) function to insert a new converted sequence into PAA :param new_paa: The converted sequence in PAA to insert """ i_sax_word = self.tree.isax.transform_paa_to_isax(new_paa, self.cardinality_next)[0] # for i_sax_word, we return the first element of each tuple and we test if the word appears in the nodes if str([i[0] for i in i_sax_word]) in self.key_nodes: # We recover the node that sticks to the word current_node = self.key_nodes[str([i[0] for i in i_sax_word])] # If it's a leaf if current_node.terminal: # and that we do not exceed the max threshold or the leaf node is no longer splitable # nb : This second condition is not suggested by Shieh and Kheogh if current_node.nb_sequences < self.tree.threshold or not current_node.splitable: current_node.insert_paa(new_paa) # But otherwise (we exceed the max threshold and the leaf is splitable) else: # Creation of the new internal node new_node = InternalNode(self.tree, current_node.parent, np_copy(current_node.sax), np_copy(current_node.cardinality), current_node.sequences) # We insert the new sequence in this new internal node new_node.insert_paa(new_paa) # For each of the sequences of the current leaf are inserted its sequences in the new internal node # This internal node will create one or more leaves to insert these sequences for ts in current_node.sequences: new_node.insert_paa(ts) # and we delete the current leaf from the list of nodes self.nodes.remove(current_node) # that we also remove from Dict del self.key_nodes[str(current_node.sax)] # and we add to the dict the new internal node self.key_nodes[str(current_node.sax)] = new_node self.nodes.append(new_node) current_node.parent = None # and we definitely delete the current leaf del current_node # Otherwise (it's not a leaf) we continue the search of the tree else: current_node.insert_paa(new_paa) # Otherwise (the Sax node does not exist) we create a new leaf else: new_node = TerminalNode(self.tree, self, [i[0] for i in i_sax_word], np_array(self.cardinality_next)) new_node.insert_paa(new_paa) self.key_nodes[str([i[0] for i in i_sax_word])] = new_node self.nodes.append(new_node) self.tree.num_nodes += 1 # Shift of node indicators self.nb_sequences += 1 # calculate mean and std if self.nb_sequences == 1: self.sum = np_copy(new_paa) self.mean = np_copy(new_paa) self.std = np_zeros(self.tree.size_word) self.sn = np_zeros(self.tree.size_word) else: mean_moins_1 = np_copy(self.mean) self.sum += new_paa self.mean = self.sum / self.nb_sequences self.sn += (new_paa - mean_moins_1) * (new_paa - self.mean) self.std = np_sqrt(self.sn / self.nb_sequences)
def loadData( self, timer, condition, # condition as set by another function bids=[], # if this is set then only load those contigs with these bin ids verbose=True, # many to some output messages silent=False, # some to no output messages loadCovProfiles=True, loadKmerPCs=True, loadKmerVarPC=True, loadRawKmers=False, makeColors=True, loadContigNames=True, loadContigLengths=True, loadContigGCs=True, loadBins=False, loadLinks=False): """Load pre-parsed data""" timer.getTimeStamp() if (silent): verbose = False if verbose: print("Loading data from:", self.dbFileName) try: self.numStoits = self.getNumStoits() self.condition = condition self.indices = self.dataManager.getConditionalIndices( self.dbFileName, condition=condition, silent=silent) if (verbose): print(" Loaded indices with condition:", condition) self.numContigs = len(self.indices) if self.numContigs == 0: print(" ERROR: No contigs loaded using condition:", condition) return if (not silent): print(" Working with: %d contigs" % self.numContigs) if (loadCovProfiles): if (verbose): print(" Loading coverage profiles") self.covProfiles = self.dataManager.getCoverageProfiles( self.dbFileName, indices=self.indices) self.normCoverages = self.dataManager.getNormalisedCoverageProfiles( self.dbFileName, indices=self.indices) # work out average coverages self.averageCoverages = np_array( [sum(i) / self.numStoits for i in self.covProfiles]) if loadRawKmers: if (verbose): print(" Loading RAW kmer sigs") self.kmerSigs = self.dataManager.getKmerSigs( self.dbFileName, indices=self.indices) if (loadKmerPCs): self.kmerPCs = self.dataManager.getKmerPCAs( self.dbFileName, indices=self.indices) if (verbose): print(" Loading PCA kmer sigs (" + str(len(self.kmerPCs[0])) + " dimensional space)") self.kmerNormPC1 = np_copy(self.kmerPCs[:, 0]) self.kmerNormPC1 -= np_min(self.kmerNormPC1) self.kmerNormPC1 /= np_max(self.kmerNormPC1) if (loadKmerVarPC): self.kmerVarPC = self.dataManager.getKmerVarPC( self.dbFileName, indices=self.indices) if (verbose): print( " Loading PCA kmer variance (total variance: %.2f" % np_sum(self.kmerVarPC) + ")") if (loadContigNames): if (verbose): print(" Loading contig names") self.contigNames = self.dataManager.getContigNames( self.dbFileName, indices=self.indices) if (loadContigLengths): self.contigLengths = self.dataManager.getContigLengths( self.dbFileName, indices=self.indices) if (verbose): print(" Loading contig lengths (Total: %d BP)" % (sum(self.contigLengths))) if (loadContigGCs): self.contigGCs = self.dataManager.getContigGCs( self.dbFileName, indices=self.indices) if (verbose): print(" Loading contig GC ratios (Average GC: %0.3f)" % (np_mean(self.contigGCs))) if (makeColors): if (verbose): print(" Creating color map") # use HSV to RGB to generate colors S = 1 # SAT and VAL remain fixed at 1. Reduce to make V = 1 # Pastels if that's your preference... self.colorMapGC = self.createColorMapHSV() if (loadBins): if (verbose): print(" Loading bin assignments") self.binIds = self.dataManager.getBins(self.dbFileName, indices=self.indices) if len( bids ) != 0: # need to make sure we're not restricted in terms of bins bin_stats = self.getBinStats() for bid in bids: try: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid] = bin_stats[bid][1] except KeyError: self.validBinIds[bid] = 0 self.isLikelyChimeric[bid] = False else: bin_stats = self.getBinStats() for bid in bin_stats: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid] = bin_stats[bid][1] # fix the binned indices self.binnedRowIndices = {} for i in range(len(self.indices)): if (self.binIds[i] != 0): self.binnedRowIndices[i] = True else: # we need zeros as bin indicies then... self.binIds = np_zeros(len(self.indices)) if (loadLinks): self.loadLinks() self.stoitColNames = self.getStoitColNames() except: print("Error loading DB:", self.dbFileName, exc_info()[0]) raise
def copy(lhs, rhs): lhs = np_copy(rhs)
def loadData(self, timer, condition, # condition as set by another function bids=[], # if this is set then only load those contigs with these bin ids verbose=True, # many to some output messages silent=False, # some to no output messages loadCovProfiles=True, loadKmerPCs=True, loadKmerVarPC=True, loadRawKmers=False, makeColors=True, loadContigNames=True, loadContigLengths=True, loadContigGCs=True, loadBins=False, loadLinks=False): """Load pre-parsed data""" timer.getTimeStamp() if(silent): verbose=False if verbose: print "Loading data from:", self.dbFileName try: self.numStoits = self.getNumStoits() self.condition = condition self.indices = self.dataManager.getConditionalIndices(self.dbFileName, condition=condition, silent=silent) if(verbose): print " Loaded indices with condition:", condition self.numContigs = len(self.indices) if self.numContigs == 0: print " ERROR: No contigs loaded using condition:", condition return if(not silent): print " Working with: %d contigs" % self.numContigs if(loadCovProfiles): if(verbose): print " Loading coverage profiles" self.covProfiles = self.dataManager.getCoverageProfiles(self.dbFileName, indices=self.indices) self.normCoverages = self.dataManager.getNormalisedCoverageProfiles(self.dbFileName, indices=self.indices) # work out average coverages self.averageCoverages = np_array([sum(i)/self.numStoits for i in self.covProfiles]) if loadRawKmers: if(verbose): print " Loading RAW kmer sigs" self.kmerSigs = self.dataManager.getKmerSigs(self.dbFileName, indices=self.indices) if(loadKmerPCs): self.kmerPCs = self.dataManager.getKmerPCAs(self.dbFileName, indices=self.indices) if(verbose): print " Loading PCA kmer sigs (" + str(len(self.kmerPCs[0])) + " dimensional space)" self.kmerNormPC1 = np_copy(self.kmerPCs[:,0]) self.kmerNormPC1 -= np_min(self.kmerNormPC1) self.kmerNormPC1 /= np_max(self.kmerNormPC1) if(loadKmerVarPC): self.kmerVarPC = self.dataManager.getKmerVarPC(self.dbFileName, indices=self.indices) if(verbose): print " Loading PCA kmer variance (total variance: %.2f" % np_sum(self.kmerVarPC) + ")" if(loadContigNames): if(verbose): print " Loading contig names" self.contigNames = self.dataManager.getContigNames(self.dbFileName, indices=self.indices) if(loadContigLengths): self.contigLengths = self.dataManager.getContigLengths(self.dbFileName, indices=self.indices) if(verbose): print " Loading contig lengths (Total: %d BP)" % ( sum(self.contigLengths) ) if(loadContigGCs): self.contigGCs = self.dataManager.getContigGCs(self.dbFileName, indices=self.indices) if(verbose): print " Loading contig GC ratios (Average GC: %0.3f)" % ( np_mean(self.contigGCs) ) if(makeColors): if(verbose): print " Creating color map" # use HSV to RGB to generate colors S = 1 # SAT and VAL remain fixed at 1. Reduce to make V = 1 # Pastels if that's your preference... self.colorMapGC = self.createColorMapHSV() if(loadBins): if(verbose): print " Loading bin assignments" self.binIds = self.dataManager.getBins(self.dbFileName, indices=self.indices) if len(bids) != 0: # need to make sure we're not restricted in terms of bins bin_stats = self.getBinStats() for bid in bids: try: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid]= bin_stats[bid][1] except KeyError: self.validBinIds[bid] = 0 self.isLikelyChimeric[bid]= False else: bin_stats = self.getBinStats() for bid in bin_stats: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid] = bin_stats[bid][1] # fix the binned indices self.binnedRowIndices = {} for i in range(len(self.indices)): if(self.binIds[i] != 0): self.binnedRowIndices[i] = True else: # we need zeros as bin indicies then... self.binIds = np_zeros(len(self.indices)) if(loadLinks): self.loadLinks() self.stoitColNames = self.getStoitColNames() except: print "Error loading DB:", self.dbFileName, exc_info()[0] raise