def trimFreq(self, fmin=False, fmax=False): self.loc_fmin = 0 self.loc_fmax = -1 if fmin: self.loc_fmin = np_amin(np_where(self.f > fmin)) if fmax: self.loc_fmax = np_amin(np_where(self.f > fmax)) if self.loc_fmax == -1: self.loc_fmax = self.f.size self.fTrim = self.f[self.loc_fmin:self.loc_fmax + 1] self.ampTrim = self.amp[self.loc_fmin:self.loc_fmax + 1] return
def trimTime(self, tmin=False, tmax=False): loc_tmin = 0 loc_tmax = -1 if tmin: loc_tmin = np_amin(np_where(self.t > tmin)) if tmax: loc_tmax = np_amin(np_where(self.t > tmax)) if loc_tmax == -1: loc_tmax = self.t.size self.t[:loc_tmax - loc_tmin] = self.t[loc_tmin:loc_tmax] self.t.resize(loc_tmax - loc_tmin, refcheck=False) self.wf = self.wf[loc_tmin:loc_tmax] self.FFT() return
def check_for_win(self): # 8 lines exist that can be the same symbol # Check verticals and horizontals # This only works for two dimensions right now p1 = set(np_where(self._tiles.flat == 1)[0]) for combo in self.WINNING_COMBINATIONS: if combo.issubset(p1): self.winning_player = 1 self.game_over() p2 = set(np_where(self._tiles.flat == 2)[0]) for combo in self.WINNING_COMBINATIONS: if combo.issubset(p2): self.winning_player = 2 self.game_over()
def normalize_board(self, player_to_move, reshape=False): x = self._tiles.flat tile_copy = np_where((x == 0) | (x == player_to_move), x, -1) if reshape: return tile_copy else: return tile_copy.ravel()
def rle_encode(self, img: np_ndarray): ''' img: numpy array, 1 - mask, 0 - background Returns run length as string formated ''' pixels = img.flatten() pixels = np_concatenate([[0], pixels, [0]]) runs = np_where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] return ' '.join(str(x) for x in runs)
def process_df(df): grouping_columns = ["event_cd", "start_bases_cd"] target_column = "end_bases_cd" ana_df = (df.query("inn_ct<=8").assign( event_cd=lambda row: np_where(row.event_cd == 2, 3, row.event_cd) ).assign( event_cd=lambda row: np_where(row.event_cd == 16, 14, row.event_cd))) count_df = (ana_df.groupby(grouping_columns + [target_column]).size(). to_frame().reset_index()).rename({0: "event_ct"}, axis=1) return count_df.assign( start_first_base=lambda row: (row.start_bases_cd & 1).astype("bool"), start_second_base=lambda row: (row.start_bases_cd & 2).astype("bool"), start_third_base=lambda row: (row.start_bases_cd & 4).astype("bool"), end_first_base=lambda row: (row.end_bases_cd & 1).astype("bool"), end_second_base=lambda row: (row.end_bases_cd & 2).astype("bool"), end_third_base=lambda row: (row.end_bases_cd & 4).astype("bool"), )
def process_df(df): batting_events_df = (df.query( "event_cd>=20 or event_cd==14 or event_cd==16 or event_cd<=3").assign( event_cd=lambda row: np_where(row.event_cd == 2, 3, row.event_cd)). assign(event_cd=lambda row: np_where( row.event_cd == 16, 14, row.event_cd))) agg_df = batting_events_df.groupby("bat_lineup_id").agg({"n": "sum"}) batting_event_prob_df = (batting_events_df.merge( agg_df, on="bat_lineup_id").assign( z=lambda row: row.n_x / row.n_y).sort_values( ["bat_lineup_id", "event_cd"]).groupby(["bat_lineup_id", "event_cd"]).sum().query("event_cd>3")) b = batting_event_prob_df.reset_index().pivot(index="bat_lineup_id", columns="event_cd", values="z") b.columns = ["base_on_balls", "single", "double", "triple", "home_run"] return b
def mask_to_bbox(self, mask: np_ndarray): # Step 1 - Find the coordinates that the mask has a value different than 0 # The result will be 2 array arr_x = [x1, x2, .. xn] & arr_y = [y1, y2, .., yn] # The arrays are matched like: (x1, y1), (x2, y2), .., (xn, yn) arr_y, arr_x = np_where(mask != 0) # Step 2 - Find the minima and the maxima of the 2 arrays y_min = np_amin(arr_y) y_max = np_amax(arr_y) x_min = np_amin(arr_x) x_max = np_amax(arr_x) return y_min, y_max, x_min, x_max
def remove_same_rows(n_pos, X_neg, X_pos, neg_comp_list): # Removing negative feature rows that exactly match any row in positives cout = 0 for ind in range(n_pos): matching_inds = np_where((X_neg == X_pos[ind]).all(axis=1)) X_neg = np_delete(X_neg, matching_inds, axis=0) for index in sorted(list(matching_inds[0]), reverse=True): cout += 1 del neg_comp_list[index] print("No. of negs removed due to same feature vector = ", cout) n_neg = len(X_neg) return X_neg, neg_comp_list, n_neg
def svds(a, k=6, tol=0): if a.ndim != 2: raise ValueError("expect a matrix") n, p = a.shape comp_right = False if n > p: comp_right = True x_prod = None if (issparse(a) and comp_right): size = p multiply = LinearOperator(matvec=lambda v: a.T.dot(a.dot(v)), shape=(p, p)) elif (issparse(a)): size = n multiply = LinearOperator(matvec=lambda v: a.dot(a.T.dot(v)), shape=(n, n)) elif (comp_right): size = p x_prod = np_array(a.T.dot(a)) multiply = LinearOperator(matvec=lambda v: x_prod.dot(v), shape=(p, p)) else: size = n x_prod = np_array(a.dot(a.T)) multiply = LinearOperator(matvec=lambda v: x_prod.dot(v), shape=(n, n)) if (x_prod is not None and (size < 100 or k >= size / 2)): x_prod = np_array(x_prod) vals, vecs = linalg.eigh(x_prod) vals = vals[::-1][0:k] vecs = vecs[:,::-1][:,0:k] else: vals, vecs = eigsh(multiply, k=k, tol=tol) def rescale(x): x.set_cached(True) scal = fp_sqrt(np_sum(x * x, axis=0)) return x.mapply_rows(scal, fp_bop_div) if (comp_right): v = fp_array(vecs) u = rescale(a.dot(vecs)) else: u = fp_array(vecs) v = rescale(a.T.dot(vecs)) s = np_where(vals > 0, np_sqrt(vals), 0) return u, s, v.T
def filter(self, **kwargs): # if empty than return self (already empty) if self.channel.size == 0: return self HRW_new = deepcopy(self) for key_filter in [ 'min_correlation', 'min_conf_nwp', 'min_conf_no_nwp', 'cloud_type', 'level' ]: if key_filter in kwargs.keys(): # if argument given is None or all keyword than skip this filter if kwargs[key_filter] == None or kwargs[ key_filter] == 'all' or kwargs[ key_filter] == 'ALL' or kwargs[key_filter] == 'A': continue n1 = str(HRW_new.channel.size) if key_filter == 'min_correlation': inds = np_where(HRW_new.correlation > kwargs[key_filter]) elif key_filter == 'min_conf_nwp': inds = np_where(HRW_new.conf_nwp > kwargs[key_filter]) elif key_filter == 'min_conf_no_nwp': inds = np_where(HRW_new.conf_no_nwp > kwargs[key_filter]) elif key_filter == 'cloud_type': mask = np_in1d(HRW_new.cloud_type, kwargs[key_filter]) inds = np_where(mask)[0] elif key_filter == 'level': if kwargs[ key_filter] == 'H': # high level: < 440hPa like in the ISCCP inds = np_where(HRW_new.pressure < 44000) elif kwargs[ key_filter] == 'M': # mid level: 440hPa ... 680hPa like in the ISCCP inds = np_where( np_logical_and(44000 < HRW_new.pressure, HRW_new.pressure < 68000)) elif kwargs[ key_filter] == 'L': # low level: > 680hPa like in the ISCCP inds = np_where(68000 < HRW_new.pressure) HRW_new.subset(inds) print " filter for " + key_filter + " = ", kwargs[ key_filter], ' (' + n1 + '->' + str( HRW_new.channel.size) + ')' return HRW_new
def deConvolve(self,G_w,noise_dT=3,noise_avg=3,fMax=2.4): self.reGrid(noise_dT=noise_dT,noise_avg=noise_avg) self.tPumpDeconv=np_arange(np_amin(self.tPump),np_amax(self.tPump), self.tTHz[1]-self.tTHz[0]) loc=np_amin(np_where(self.f >= fMax)) for i in range(self.tPumpSkew.size): self.dTSkewFFT[i,:loc]=self.dTSkewFFT[i,:loc]/G_w[:loc] self.avgSkewFFT[i,:loc]=self.avgSkewFFT[i,:loc]/G_w[:loc] self.dTskew=np_irfft(self.dTSkewFFT,axis=1) self.avgSkewFFT=np_irfft(self.avgSkewFFT,axis=1) self.dTdeconv=unSkew(self.tTHz,self.tPump,self.tPumpSkew,self.dTskew) self.avgDeconv=unSkew(self.tTHz,self.tPump ,self.tPumpSkew,self.avgSkewFFT) self.refDeconv=self.avgDeconv-self.dTdeconv self.pumpDeconv=self.avgDeconv+self.dTdeconv self.refFFTdeconv=np_rfft(self.refDeconv,axis=1) self.pumpFFTdeconv=np_rfft(self.pumpDeconv,axis=1) self.transDeconv=self.pumpFFTdeconv/self.refFFTdeconv return
def sample(bins, time, value): """ Given value[i] was observed at time[i], group them into bins i.e., *(bins[j], bins[j+1], ...)* Values for bin j are equal to the average of all value[k] and, bin[j] <= time[k] < bin[j+1]. __Arguments__ bins: _np.array_ Endpoints of the bins. For n bins it shall be of length n + 1. t: _np.array_ Times at which the values are observed. vt: _np.array_ Values for those times. __Returns__ x: _np.array_ Endspoints of all the bins. y: _np.array_ Average values in all bins. """ bin_idx = np_digitize(time, bins) - 1 value_sums = np_zeros(shape=len(bins) - 1, dtype=np_float32) value_cnts = np_zeros(shape=len(bins) - 1, dtype=np_float32) np_add.at(value_sums, bin_idx, value) np_add.at(value_cnts, bin_idx, 1) # ensure graph has no holes zeros = np_where(value_cnts == 0) assert value_cnts[0] > 0 for z in zeros: value_sums[z] = value_sums[z - 1] value_cnts[z] = value_cnts[z - 1] return bins[1:], value_sums / value_cnts
def filter(self, **kwargs): # if empty than return self (already empty) if self.channel.size == 0: return self HRW_new = deepcopy(self) for key_filter in ['min_correlation', 'min_conf_nwp', 'min_conf_no_nwp', 'cloud_type', 'level']: if key_filter in kwargs.keys(): # if argument given is None or all keyword than skip this filter if kwargs[key_filter] == None or kwargs[key_filter] == 'all' or kwargs[key_filter] == 'ALL' or kwargs[key_filter] == 'A': continue n1 = str(HRW_new.channel.size) if key_filter == 'min_correlation': inds = np_where(HRW_new.correlation > kwargs[key_filter]) elif key_filter == 'min_conf_nwp': inds = np_where(HRW_new.conf_nwp > kwargs[key_filter]) elif key_filter == 'min_conf_no_nwp': inds = np_where(HRW_new.conf_no_nwp > kwargs[key_filter]) elif key_filter == 'cloud_type': mask = np_in1d(HRW_new.cloud_type, kwargs[key_filter]) inds = np_where(mask)[0] elif key_filter == 'level': if kwargs[key_filter] == 'H': # high level: < 440hPa like in the ISCCP inds = np_where(HRW_new.pressure < 44000 ) elif kwargs[key_filter] == 'M': # mid level: 440hPa ... 680hPa like in the ISCCP inds = np_where( np_logical_and(44000 < HRW_new.pressure, HRW_new.pressure < 68000) ) elif kwargs[key_filter] == 'L': # low level: > 680hPa like in the ISCCP inds = np_where(68000 < HRW_new.pressure) HRW_new.subset(inds) print " filter for "+key_filter+" = ", kwargs[key_filter],' ('+n1+'->'+str(HRW_new.channel.size)+')' return HRW_new
def sig_to_eps(f, sigma): sigma = np_conjugate(sigma[np_where(f != 0)]) f = f[np_where(f != 0)] w = 2 * np_pi * f * 1e12 return f, 1 + 1j * sigma / (w * epsilon0)
def generate_intermediate_result(division): students = Student.objects.filter(division=division).exclude(vacant=True) results = [] # Temporary student = Student.objects.first() students = [student] ##### for index, student in enumerate(students): ass = Assessment.objects.filter(student=student).values( "exam__name", "exam__subject__name", "marks", "note") df = pd.DataFrame(ass) df.columns = ["exam", "subject", "marks", "note"] df["note"] = df["note"].replace({"ABSENT": "AB", "BLANK": "BLK"}) df["marks"] = np_where(df["marks"] == -1, df["note"], df["marks"]) df = df.drop(["note"], axis=1) theory = df[df["exam"] == "final_theory"].drop("exam", axis=1) oral = df[df["exam"] == "final_oral"].drop("exam", axis=1) final = theory.merge(oral, on="subject", suffixes=["_theory", "_oral"]) final["marks"] = final["marks_theory"].map( str) + " + " + final["marks_oral"].map(str) final = final.drop(["marks_theory", "marks_oral"], axis=1) final["exam"] = "final" # Remove final_theory, final_oral df = df.drop(df[df["exam"].str.startswith("final")].index) ready_for_tabulation = pd.concat([df, final]) data = [] for s in ready_for_tabulation["subject"].unique(): row = [s] sub_data = ready_for_tabulation[ready_for_tabulation["subject"] == s] for e in ["unit_one", "terminal", "unit_two", "final"]: row.append(sub_data[sub_data["exam"] == e].marks.values[0]) data.append(row) df = pd.DataFrame( data, columns=["Subject", "Unit 1", "Terminal", "Unit 2", "Final"]).set_index("Subject") df["Total"] = df.apply(get_sub_total, axis=1) df["%"] = ceil(df["Total"] / 2) df["%"] = df["%"].round(2) ld = student.identifier == "LD" sports = student.sports > 0 if sports: sports_marks = s.sportsdata_set.aggregate( Max("extra_marks")).get("extra_marks__max") else: sports_marks = 0 result = apply_grace_marks(df, ld=ld, sports=sports, sports_marks=sports_marks) df = result["df"] status = result["status"] sports_remain_marks = result["sports_remain_marks"] if status == "FAIL": df.drop("Grace Marks", axis=1, inplace=True) else: uni = df["Grace Marks"].unique() if len(uni) == 1 and uni[0] == 0: df.drop("Grace Marks", axis=1, inplace=True) df.index.name = None total_marks = int(ceil(df.Total / 2).sum()) # Add EVS evs_marks = PT_EVS_Assessment.objects.filter( student=student, exam__subject="EVS").aggregate(Sum("marks"))["marks__sum"] df = df.append(pd.Series({}, name="EVS", dtype=int)) df.loc["EVS", "%"] = evs_marks df = df.fillna("") total_marks += evs_marks exam_conducted_of_marks = (len(df) - 1) * 100 exam_conducted_of_marks += 50 if sports: marks = " ".join([str(total_marks), "+", str(sports_remain_marks)]) final_perc = round( (total_marks + sports_remain_marks) / exam_conducted_of_marks, 2) # CHECK, Round UP or Round Down else: marks = total_marks final_perc = round(total_marks / exam_conducted_of_marks, 2) final_perc *= 100 # PT Grade pt_marks = PT_EVS_Assessment.objects.filter( student=student, exam__subject="PT").aggregate(Sum("marks"))["marks__sum"] pt_grade = get_pt_grade(pt_marks) non_int_color_cols = ["Unit 1", "Terminal", "Unit 2", "Final"] subjects = list(df.index) subjects.remove("EVS") html = df.style.applymap( color_cell, subset=(subjects, non_int_color_cols)).applymap( color_sub_perc, subset=(subjects, ["%"])).set_table_attributes( "class='dataframe mystyle'").set_precision(2).render() env = Environment(loader=FileSystemLoader('./result')) template = env.get_template("student_result.html") template_vars = { "title": "Result", "name": student.name, "roll": student.roll_num, "division": division, "marks": marks, "final_perc": final_perc, "result": html, "status": status, "sports_remain_marks": sports_remain_marks, "sports": sports, "ld": ld, "pt_grade": pt_grade } html_out = template.render(template_vars) fp = "./student_results/{}/roll_{}.html".format( division, str(student.roll_num)) default_storage.delete(fp) f = tempfile.TemporaryFile(mode="w+") f.write(html_out) default_storage.save(fp, f) print(fp) template_vars["id"] = index results.append(template_vars) env = Environment(loader=FileSystemLoader('./result')) template = env.get_template("inter_result.html") template_vars = { "title": "College Result", "division": division, "results": results } html_out = template.render(template_vars) file_path = "./results/intermediate_result.html" default_storage.delete(file_path) f = tempfile.TemporaryFile(mode="w+") f.write(html_out) file_name = default_storage.save(file_path, f) return file_path
def run(self, scaffold_stats, num_clusters, num_components, K, no_coverage, no_pca, iterations, genome_file, output_dir): """Calculate statistics for genomes. Parameters ---------- scaffold_stats : ScaffoldStats Statistics for individual scaffolds. num_clusters : int Number of cluster to form. num_components : int Number of PCA components to consider. K : int K-mer size to use for calculating genomic signature. no_coverage : boolean Flag indicating if coverage information should be used during clustering. no_pca : boolean Flag indicating if PCA of genomic signature should be calculated. iterations : int Iterations of clustering to perform. genome_file : str Sequences being clustered. output_dir : str Directory to write results. """ # get GC and mean coverage for each scaffold in genome self.logger.info('') self.logger.info(' Determining mean coverage and genomic signatures.') signatures = GenomicSignature(K) genome_stats = [] signature_matrix = [] seqs = seq_io.read(genome_file) for seq_id, seq in seqs.iteritems(): stats = scaffold_stats.stats[seq_id] if not no_coverage: genome_stats.append((np_mean(stats.coverage))) else: genome_stats.append(()) if K == 0: pass elif K == 4: signature_matrix.append(stats.signature) else: sig = signatures.seq_signature(seq) total_kmers = sum(sig) for i in xrange(0, len(sig)): sig[i] = float(sig[i]) / total_kmers signature_matrix.append(sig) # calculate PCA of tetranucleotide signatures if K != 0: if not no_pca: self.logger.info(' Calculating PCA of genomic signatures.') pc, variance = self.pca(signature_matrix) self.logger.info(' First %d PCs capture %.1f%% of the variance.' % (num_components, sum(variance[0:num_components]) * 100)) for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, pc[i][0:num_components]) else: self.logger.info(' Using complete genomic signature.') for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, signature_matrix[i]) # whiten data if feature matrix contains coverage and genomic signature data if not no_coverage and K != 0: print ' Whitening data.' genome_stats = whiten(genome_stats) else: genome_stats = np_array(genome_stats) # cluster self.logger.info(' Partitioning genome into %d clusters.' % num_clusters) bError = True while bError: try: bError = False _centroids, labels = kmeans2(genome_stats, num_clusters, iterations, minit='points', missing='raise') except ClusterError: bError = True for k in range(num_clusters): self.logger.info(' Placed %d sequences in cluster %d.' % (sum(labels == k), (k + 1))) # write out clusters genome_id = remove_extension(genome_file) for k in range(num_clusters): fout = open(os.path.join(output_dir, genome_id + '_c%d' % (k + 1) + '.fna'), 'w') for i in np_where(labels == k)[0]: seq_id = seqs.keys()[i] fout.write('>' + seq_id + '\n') fout.write(seqs[seq_id] + '\n') fout.close()
def kmeans(self, scaffold_stats, num_clusters, num_components, K, no_coverage, no_pca, iterations, genome_file, output_dir): """Cluster genome with k-means. Parameters ---------- scaffold_stats : ScaffoldStats Statistics for individual scaffolds. num_clusters : int Number of cluster to form. num_components : int Number of PCA components to consider. K : int K-mer size to use for calculating genomic signature no_coverage : boolean Flag indicating if coverage information should be used during clustering. no_pca : boolean Flag indicating if PCA of genomic signature should be calculated. iterations: int iterations to perform during clustering genome_file : str Sequences being clustered. output_dir : str Directory to write results. """ # get GC and mean coverage for each scaffold in genome self.logger.info('Determining mean coverage and genomic signatures.') signatures = GenomicSignature(K) genome_stats = [] signature_matrix = [] seqs = seq_io.read(genome_file) for seq_id, seq in seqs.items(): stats = scaffold_stats.stats[seq_id] if not no_coverage: genome_stats.append((np_mean(stats.coverage))) else: genome_stats.append(()) if K == 0: pass elif K == 4: signature_matrix.append(stats.signature) else: sig = signatures.seq_signature(seq) total_kmers = sum(sig) for i in range(0, len(sig)): sig[i] = float(sig[i]) / total_kmers signature_matrix.append(sig) # calculate PCA of signatures if K != 0: if not no_pca: self.logger.info('Calculating PCA of genomic signatures.') pc, variance = self.pca(signature_matrix) self.logger.info( 'First {:,} PCs capture {:.1f}% of the variance.'.format( num_components, sum(variance[0:num_components]) * 100)) for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, pc[i][0:num_components]) else: self.logger.info('Using complete genomic signature.') for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, signature_matrix[i]) # whiten data if feature matrix contains coverage and genomic signature data if not no_coverage and K != 0: self.logger.info('Whitening data.') genome_stats = whiten(genome_stats) else: genome_stats = np_array(genome_stats) # cluster self.logger.info( 'Partitioning genome into {:,} clusters.'.format(num_clusters)) bError = True while bError: try: bError = False _centroids, labels = kmeans2(genome_stats, num_clusters, iterations, minit='points', missing='raise') except ClusterError: bError = True for k in range(num_clusters): self.logger.info('Placed {:,} sequences in cluster {:,}.'.format( sum(labels == k), (k + 1))) # write out clusters genome_id = remove_extension(genome_file) for k in range(num_clusters): fout = open( os.path.join(output_dir, genome_id + '_c%d' % (k + 1) + '.fna'), 'w') for i in np_where(labels == k)[0]: seq_id = seqs.keys()[i] fout.write('>' + seq_id + '\n') fout.write(seqs[seq_id] + '\n') fout.close()