def gen_entity_pos(col_name): b = df[df[col_name] != ""].raw_address.values.astype(str) df.loc[df[col_name] == "", 'start_%s' % col_name] = -2 df.loc[df[col_name] == "", 'end_%s' % col_name] = -2 df["mock_%s" % col_name] = " " + df[col_name] + " " a = df[df[col_name] != ""]["mock_%s" % col_name].values.astype(str) df.loc[(df[col_name] != ""), 'start_%s' % col_name] = find(b, a) + 1 df.loc[(df['start_%s' % col_name] == 0), 'start_%s' % col_name] = -1 print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)].shape) b = df[df['start_%s' % col_name] == -1].raw_address.values.astype(str) df["mock_%s" % col_name] = " " + df[col_name] + "," a = df[df['start_%s' % col_name] == -1]["mock_%s" % col_name].values.astype(str) df.loc[df['start_%s' % col_name] == -1, 'start_%s' % col_name] = find(b, a) + 1 df.loc[(df['start_%s' % col_name] == 0), 'start_%s' % col_name] = -1 print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)].shape) b = df[df['start_%s' % col_name] == -1].raw_address.values.astype(str) a = df[df['start_%s' % col_name] == -1][col_name].values.astype(str) df.loc[df['start_%s' % col_name] == -1, 'start_%s' % col_name] = find(b, a) print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)].shape) # print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)]) # print(df[(df['start_%s' % col_name] != 0)]) print("--------------") # asd # kembangan utara b, df["end_%s" % col_name] = df["start_%s" % col_name] + df[col_name].str.len()
def _construct_features_array(self, soi): """ Constructs features array. :return: numpy array for running the model. """ shape = (len(soi), len(self.features_metadata)) batch_encoded_features = np.zeros(shape) # first feature is the gc content in acceptor region (double acceptor window at the end) acceptors = [x[2 * self.acc_i:] for x in soi] batch_encoded_features[:, 0] = np.array( [self._count_gc_content(acceptor) for acceptor in acceptors]) # second feature is gc content in intron region introns = [x[self.don_i:self.acc_i] for x in soi] batch_encoded_features[:, 1] = np.array( [self._count_gc_content(intron) for intron in introns]) # get the list of bp index for each sequence of batch self.bp_indexes = self._get_bp_indexes_labranchor(soi) # slice out feature sequences # seqA = [ seq[self.acc_i - 4 : self.acc_i + 6] for seq in soi] seqB = np.array([ soi[j][int(self.bp_indexes[j]) - 15:int(self.bp_indexes[j]) + 6] for j in range(len(soi)) ]) B_i = 15 # seqD = [ seq[self.don_i - 3 : self.acc_i + 16] for seq in soi] # fill out the rest of the features (base-by-region features) for i in range(2, len(self.features_metadata)): # parse the current feature info (region, pos, nucl) = self.features_metadata[i] if (region == 'seqD' or region == 'seqA'): # decrement, since acc_i/don_i is pos = 1 if pos > 0: pos -= 1 # apply vectorized numpy operations if region == 'seqD': idx = self.don_i + int(pos) else: idx = self.acc_i + int(pos) feat_column = npc.find(soi, nucl, idx, idx + 1) else: idx = B_i + int(pos) feat_column = npc.find(seqB, nucl, idx, idx + 1) feat_column[feat_column > 1] = 1 feat_column[feat_column == -1] = 0 batch_encoded_features[:, i] = feat_column return batch_encoded_features
def __init__(self, num_clus, file_id, samp_size): filename = 'elki_output/' + file_id + '/' + file_id + '-k' + str(num_clus) + '-samp' + str(samp_size) + '/elki-clusters.txt' reader = csv.reader(open(filename, "r"), delimiter=",") x = list(reader) self.cluster_centers = np.array(x).astype(str) self.num_clus = num_clus filename_full = 'elki_output/' + file_id + '/' + file_id + '-k' + str(num_clus) + '/full-elki-nosamp.txt' print(filename_full) # self.full_data = np.array(list(csv.reader(open(filename_full, "r"), delimiter="\n"))).astype(str) raw_data_ingest = np.array(list(csv.reader(open(filename_full, "r"), delimiter="\n"))) raw_data_str = raw_data_ingest[find(raw_data_ingest, 'ID') != -1] split_str = lambda x: x.split(" ") self.full_data = np.array(list(map(split_str, raw_data_str))) k = int(len(self.full_data)/num_clus - (len(self.full_data) % num_clus) ) print(k) self.k = k self.n_eq = (len(self.full_data) - (len(self.full_data) % k))/k self.coords = self.full_data[:, 1:3].astype(float)[0:int(self.n_eq * k)] self.raw_coords = self.full_data[:, 1:3].astype(float) self.dist_mat = euclidean_distances(self.coords, self.cluster_centers)
def search(): params = app.current_request.query_params or {} patterns = params.get('label-filter', '').split() all_labels = [] for name in repo.ls(): clct = repo / name labels = asarray(clct.ls(), dtype="U") for pattern in patterns: cond = find(char.lower(labels), pattern.lower()) != -1 all_labels.extend(f'{name}/{l}' for l in labels[cond]) return render_template('search-modal.html', labels=all_labels)
def gen_entity_pos(col_name): df["mock_%s" % col_name] = " " + df[col_name] + " " a = df["mock_%s" % col_name].values.astype(str) b = df.raw_address.values.astype(str) df["start_%s" % col_name] = find(b, a) + 1 df["mock2_%s" % col_name] = " " + df[col_name] + "," a = df["mock2_%s" % col_name].values.astype(str) df["start2_%s" % col_name] = find(b, a) + 1 a = df[col_name].values.astype(str) df["cur_start_%s" % col_name] = find(b, a) # + 1 # kembangan utara b, df.loc[df['start_%s' % col_name] == 0, 'start_%s' % col_name] = df["start2_%s" % col_name] df.loc[df['start_%s' % col_name] == 0, 'start_%s' % col_name] = df["cur_start_%s" % col_name] df["end_%s" % col_name] = df["start_%s" % col_name] + df[col_name].str.len()
def re_gen_entity_pos(col_name): b = df[df['start_%s' % col_name] == -1].hash_raw_address.values.astype(str) df["mock_%s" % col_name] = " " + df[col_name] + " " a = df[df['start_%s' % col_name] == -1]["mock_%s" % col_name].values.astype(str) df.loc[df['start_%s' % col_name] == -1, 'temp_start_%s' % col_name] = find(b, a) + 1 df.loc[(df['temp_start_%s' % col_name] > 0), 'start_%s' % col_name] = df['temp_start_%s' % col_name] print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)].shape) b = df[df['start_%s' % col_name] == -1].hash_raw_address.values.astype(str) df["mock_%s" % col_name] = " " + df[col_name] + "," a = df[df['start_%s' % col_name] == -1]["mock_%s" % col_name].values.astype(str) df.loc[df['start_%s' % col_name] == -1, 'temp_start_%s' % col_name] = find(b, a) + 1 df.loc[(df['temp_start_%s' % col_name] > 0), 'start_%s' % col_name] = df['temp_start_%s' % col_name] print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)].shape) b = df[df['start_%s' % col_name] == -1].hash_raw_address.values.astype(str) a = df[df['start_%s' % col_name] == -1][col_name].values.astype(str) df.loc[df['start_%s' % col_name] == -1, 'start_%s' % col_name] = find(b, a) print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)].shape) check_hash = (df[(df['start_%s' % col_name] == -1)][["POI", "hash_raw_address"]]) check_hash.to_csv("check_hash.csv") print("--------------") # unmatch: 32671 -> 21035 # asd # kembangan utara b, df["end_%s" % col_name] = df["start_%s" % col_name] + df[col_name].str.len()
def trace_process(alignment_matrix): traceback_matrix_rows = np.zeros(alignment_matrix.shape) traceback_matrix_cols = np.zeros(alignment_matrix.shape) #计算转移矩阵 transition_submatrix = None for cl in range(1, alignment_matrix.shape[0]): this_row_best_score = np.ones( shape=(1, int(num_candidate_matches[(cl)]))) * (-np.inf) this_row_best_row_from = np.zeros( shape=(1, int(num_candidate_matches[cl]))) this_row_best_col_from = np.zeros( shape=(1, int(num_candidate_matches[cl]))) #对每个row计算转移矩阵,包括badlevel惩罚 对应matlab calculateSequenceVV中209行 for cfrom in range(np.min(max_sequential_bad, cl - 1)): transition_submatrix=np.reshape(p_vals[cl,cfrom,transition_info.perm_matrix[candidate_matches[cl-cfrom,:],candidate_matches[cl,:]]]+\ np.sum(p_bad[(cl-cfrom):cl]),num_candidate_matches[cl-cfrom],num_candidate_matches[cl],p_good[cl-cfrom]) #为每个to找到最好的from from_score, where_from = np.max( np.transpose(transition_submatrix + alignment_matrix[cl - cfrom, candidate_matches[cl - cfrom, :]]), axis=0) #找到比目前最好元素更好的to元素 better_score = from_score > this_row_best_score #获取非0元素索引和值 from_indices = find(candidate_matches[cl - cfrom, :]) #更新最好的序列路径 this_row_best_col_from[better_score] = from_indices[ where_from[better_score]] this_row_best_row_from[better_score] = cl - cfrom this_row_best_score[better_score] = from_score[better_score] #使用最佳选项更新alignment和traceback矩阵 alignment_matrix[cl, :] = this_row_best_score + alignment_matrix[ cl, candidate_matches[cl, :]] traceback_matrix_rows[cl, candidate_matches[ cl, :]] = this_row_best_row_from traceback_matrix_cols[cl, candidate_matches[ cl, :]] = this_row_best_col_from #找到开始点,相当于dtw中的最右下角的点 total_score, kmer = np.max(alignment_matrix[-1, :]) #???为什么要log alignment_matrix = alignment_matrix - np.log(alignment_matrix)
def domain (namefile,proj=None,back="vishires",target=None): from netCDF4 import Dataset from myplot import getcoord2d,define_proj,makeplotres,simplinterv,getprefix,dumpbdy,getproj,latinterv,wrfinterv,simplinterv from mymath import max,min from matplotlib.pyplot import contourf,rcParams,pcolor from numpy.core.defchararray import find from numpy import arange ### nc = Dataset(namefile) ### if proj == None: proj = "ortho" #proj = getproj(nc) ### prefix = namefile[0] + namefile[1] + namefile[2] if prefix == "geo": [lon2d,lat2d] = getcoord2d(nc,nlat='XLAT_M',nlon='XLONG_M') var = 'HGT_M' zeplot = "domain" else: [lon2d,lat2d] = getcoord2d(nc) var = "HGT" zeplot = getprefix(nc) + "domain" ### lon2d = dumpbdy(lon2d,5) lat2d = dumpbdy(lat2d,5) if proj == "npstere": [wlon,wlat] = latinterv("North_Pole") elif proj in ["lcc","laea"]: [wlon,wlat] = wrfinterv(lon2d,lat2d) else: [wlon,wlat] = simplinterv(lon2d,lat2d) ### m = define_proj(proj,wlon,wlat,back=back) x, y = m(lon2d, lat2d) ### what_I_plot = dumpbdy(nc.variables[var][0,:,:], 5) #levinterv = 250. #zelevels = arange(min(what_I_plot)-levinterv,max(what_I_plot)+levinterv,levinterv) zelevels = 30 contourf(x, y, what_I_plot, zelevels) #pcolor(x,y,what_I_plot) ## on voit trop les lignes ! ### if not target: zeplot = namefile[0:find(namefile,'wrfout')] + zeplot else: zeplot = target + "/" + zeplot ### pad_inches_value = 0.35 makeplotres(zeplot,res=100.,pad_inches_value=pad_inches_value) #,erase=True) ## a miniature makeplotres(zeplot,res=200.,pad_inches_value=pad_inches_value,disp=False)
def main(): parser = argparse.ArgumentParser( description='Check the output of Mothur pcr.seqs command.') parser.add_argument('-s', '--size', metavar = '', required = True, \ help = 'Specify sample size') parser.add_argument('-b', '--before', metavar = '', required = True, \ help = 'Specify fasta file that is input to pcr.seqs') parser.add_argument('-a', '--after', metavar = '', required = True, \ help = 'Specify fasta file output from pcr.seqs') parser.add_argument('-l', '--oligos', metavar = '', required = True, \ help = 'Specify oligos file') parser.add_argument('-g', '--group', metavar = '', required = True, \ help = 'Specify group file') parser.add_argument('-o', '--outfile', metavar = '', required = True, \ help = 'Specify path and name for output file') args = parser.parse_args() # Generate a random sample of integers. # Range of the random sample equals the number of reads in the pcr.seqs output fasta file random_sample = random.sample(range(round(file_len(args.after) / 2)), int(args.size)) # We need to get the reads based on our random sample. # Then we need to get the sequence info for these IDs (sequence after pcr.seqs and before it) # Then we need to merge this info with the primer sequence info, and print this table d = {} # Dictionary to hold random sample of read IDs and their sequences for i in random_sample: line = linecache.getline( args.after, i) # Get the line corresponding to the random integer # If line is a READ ID, make that the key. If not, make the next line the key. if line.startswith('>'): key = line.rstrip().split()[0][1:] key = key.split('|')[0] seq = linecache.getline(args.after, i + 1) else: key = linecache.getline(args.after, i + 1).split()[0][1:] key = key.split('|')[0] seq = linecache.getline(args.after, i + 2) d[key] = seq.strip( ) # Key = read ID, value = sequence after pcr.seqs cmd was run # For the read IDs in dict, get the sequence before pcr.seqs command was run with open(args.before, 'r') as f: for line in f: if line.startswith('>'): if line.strip().split()[0][1:] in d.keys(): append_value(d, line.strip().split()[0][1:], next(f).strip()) # For the read IDs in dict, get the primer that Mothur has identified in them with open(args.group, 'r') as f: for line in f: if line.strip().split()[0] in d.keys(): primer = line.strip().split()[1] append_value(d, line.strip().split()[0], primer.split('.')[1]) o = {} # Dictionary to hold oligos file info (primer seqs) with open(args.oligos, 'r') as f: for line in f: if line.startswith('primer'): key = line.strip().split()[3] # primer name fwd = line.strip().split()[1] # forward primer seq rev = line.strip().split()[2] # reverse primer seq rev_seq = Seq(rev) rev_compl = str(rev_seq.reverse_complement() ) # Reverse complement of rev primer fwd_seq = Seq(fwd) fwd_compl = str(fwd_seq.reverse_complement() ) # Reverse complement of fwd primer o[key] = fwd # Key is primer name, 1st value is forward primer seq append_value(o, key, fwd_compl) # 2nd value: rev compl fwd primer append_value(o, key, rev) # 3rd value: reverse primer append_value(o, key, rev_compl) # 4th value: rev compl rev primer reads = pd.DataFrame([(k, *v) for k, v in d.items()]) reads.columns = ('id', 'after', 'before', 'primer_name') # primers = pd.DataFrame([(k, *v) for k, v in o.items()]) primers.columns = ('primer_name', 'fwd', 'fwd_rc', 'rev', 'rev_rc') # Merge the primer sequence into the sample of read IDs full = pd.merge(reads, primers, on='primer_name', how='left') # Get the fragment with primers trimmed off a = full.fwd.values.astype(str) b = full.before.values.astype(str) full = full.assign(start_fwd=find(b, a)) # Start of fwd primer sequence full['fwd_len'] = full['fwd'].str.len( ) # Length of primer seq [start the fragment at start+len] full['start'] = full['start_fwd'] + full[ 'fwd_len'] # This is where the primer-less fragment starts a = full.rev.values.astype(str) full = full.assign(end=find( b, a)) # Start of rev_compl primer sequence [end fragment 'up to' end] full['segment'] = full.apply( lambda x: x[2][x[10]:x[11]], axis=1 ) # x[2] is the 'before' seq, x[10] is index after fwd primer ends, x[11] is index where rev primer begins full.loc[ full['start_fwd'] == -1, 'segment'] = "Forward primer not found" # Indicate if the forward primer not found new_full = full[[ 'id', 'before', 'fwd', 'rev_rc', 'after', 'segment', 'primer_name', 'fwd_rc', 'rev', 'start_fwd', 'start', 'end' ]] new_full.to_csv(args.outfile, header=True, index=False, sep='\t')
def _construct_features_array(self,soi): """ Constructs features array. :return: numpy array for running the model. """ shape = ( len(soi), len(self.features_metadata) ) batch_encoded_features = np.zeros( shape ) # first feature is the gc content in acceptor region (double acceptor window at the end) acceptors = [ x[ 2*self.acc_i :] for x in soi ] batch_encoded_features[:, 0] = np.array( [ self._count_gc_content(acceptor) for acceptor in acceptors ] ) # second feature is gc content in intron region introns = [ x[ self.don_i : self.acc_i ] for x in soi ] batch_encoded_features[:, 1] = np.array( [ self._count_gc_content(intron) for intron in introns ] ) # slice out feature sequences #seqA = [ seq[self.acc_i - 4 : self.acc_i + 6] for seq in soi] seqB = np.array([ soi[j][int(self.bp_indexes[j]) - 15: int(self.bp_indexes[j]) + 6] for j in range(len(soi))]) B_i = 15 #seqD = [ seq[self.don_i - 3 : self.acc_i + 16] for seq in soi] # fill out the rest of the features (base-by-region features) for i in range(2, len(self.features_metadata)): # parse the current feature info (region, pos, nucl) = self.features_metadata[i] if (region == 'seqD' or region == 'seqA'): #decrement, since acc_i/don_i is pos = 1 if pos > 0: pos -= 1 #apply vectorized numpy operations if region == 'seqD': idx = self.don_i + int(pos) else: idx = self.acc_i + int(pos) feat_column = npc.find(soi, nucl, idx, idx + 1) else: idx = B_i + int(pos) feat_column = npc.find(seqB, nucl, idx, idx + 1) feat_column[feat_column>1] = 1 feat_column[feat_column == -1] = 0 batch_encoded_features[:, i] = feat_column # for j in range( len(soi) ): # if region == 'seqB': # i_oi = int(self.bp_indexes[j]) + int(pos) # if soi[j][i_oi].upper() == nucl: # batch_encoded_features[j, i] = 1 # else: # if region == 'seqA' and soi[j][ (self.acc_i + int(pos)) ].upper() == nucl: # batch_encoded_features[j, i] = 1 # elif region == 'seqD' and soi[j][ (self.don_i + int(pos)) ].upper() == nucl: # batch_encoded_features[j, i] = 1 # executor = concurrent.futures.ProcessPoolExecutor(10) # futures = [executor.submit(work_seq_on_feature, seqA[j], seqB[j], seqD[j], region, pos, nucl, j) for j in range(len(soi))] # concurrent.futures.wait(futures) #pool = ProcessPool(nodes = 10) #feat_column = np.array(pool.map(work_seq_on_feature, seqA, seqB, seqD, [ region for i in range(len(soi))], [ pos for i in range(len(soi))], \ # [nucl for i in range(len(soi))])) #for future in futures: # (seq_idx, value) = future.result() # if value != 0: # feat_column[seq_idx] = value return batch_encoded_features
def match(x, pat): return np.any((npchar.find(x, pat) + 1).astype(bool), axis=1)
def get_question_type_stat(keyword, questions, f1_em): questions_ind = get_samples_with_conditions(questions, lambda s : find(s[:], keyword) >= 0) f1_em_type = [f1_em[i] for i in questions_ind] print np.mean(f1_em_type, axis=0) print (1.0 * len(questions_ind)) / len(questions)
def winds (namefile,\ nvert,\ proj=None,\ back=None,\ target=None, stride=3,\ numplot=2,\ var=None,\ colorb="def",\ winds=True,\ addchar=None,\ interv=[0,1],\ vmin=None,\ vmax=None,\ tile=False,\ zoom=None,\ display=True,\ itstep=None,\ hole=False,\ save="gui",\ anomaly=False,\ var2=None,\ ndiv=10,\ first=1,\ mult=1.,\ zetitle="fill"): #################################################################################################################### ### Colorbars http://www.scipy.org/Cookbook/Matplotlib/Show_colormaps?action=AttachFile&do=get&target=colormaps3.png ################################# ### Load librairies and functions from netCDF4 import Dataset from myplot import getcoord2d,define_proj,makeplotres,simplinterv,vectorfield,ptitle,latinterv,getproj,wrfinterv,dumpbdy,\ fmtvar,definecolorvec,defcolorb,getprefix,putpoints,calculate_bounds,errormess,definesubplot,\ zoomset,getcoorddef,getwinddef,whatkindfile,reducefield,bounds,getstralt,getfield,smooth,nolow,\ getname,localtime,polarinterv from mymath import deg,max,min,mean from matplotlib.pyplot import contour,contourf, subplot, figure, rcParams, savefig, colorbar, pcolor, show from matplotlib.cm import get_cmap import numpy as np from numpy.core.defchararray import find ###################### ### Load NETCDF object nc = Dataset(namefile) ################################## ### Initial checks and definitions typefile = whatkindfile(nc) ## TYPEFILE if var not in nc.variables: var = False ## VAR if winds: ## WINDS [uchar,vchar,metwind] = getwinddef(nc) if uchar == 'not found': winds = False if not var and not winds: errormess("please set at least winds or var",printvar=nc.variables) [lon2d,lat2d] = getcoorddef(nc) ## COORDINATES, could be moved below if proj == None: proj = getproj(nc) ## PROJECTION ########################## ### Define plot boundaries ### todo: possible areas in latinterv in argument (ex: "Far_South_Pole") if proj in ["npstere","spstere"]: [wlon,wlat] = polarinterv(lon2d,lat2d) elif proj in ["lcc","laea"]: [wlon,wlat] = wrfinterv(lon2d,lat2d) else: [wlon,wlat] = simplinterv(lon2d,lat2d) if zoom: [wlon,wlat] = zoomset(wlon,wlat,zoom) ######################################### ### Name for title and graphics save file basename = getname(var=var,winds=winds,anomaly=anomaly) basename = basename + getstralt(nc,nvert) ## can be moved elsewhere for a more generic routine ################################## ### Open a figure and set subplots fig = figure() subv,subh = definesubplot( numplot, fig ) ################################# ### Time loop for plotting device found_lct = False nplot = 1 itime = first error = False if itstep is None and numplot > 0: itstep = int(24./numplot) elif numplot <= 0: itstep = 1 while error is False: ### Which local time ? ltst = localtime ( interv[0]+itime*interv[1], 0.5*(wlon[0]+wlon[1]) ) ### General plot settings #print itime, int(ltst), numplot, nplot if numplot >= 1: if nplot > numplot: break if numplot > 1: if typefile not in ['geo']: subplot(subv,subh,nplot) found_lct = True ### If only one local time is requested (numplot < 0) elif numplot <= 0: if int(ltst) + numplot != 0: itime += 1 if found_lct is True: break ## because it means LT was found at previous iteration else: continue ## continue to iterate to find the correct LT else: found_lct = True ### Map projection m = define_proj(proj,wlon,wlat,back=back) x, y = m(lon2d, lat2d) #### Contour plot if var2: what_I_contour, error = reducefield( getfield(nc,var2), d4=itime, d3=nvert ) if not error: if typefile in ['mesoapi','meso']: what_I_contour = dumpbdy(what_I_contour,6) zevmin, zevmax = calculate_bounds(what_I_contour) zelevels = np.linspace(zevmin,zevmax,num=20) if var2 == 'HGT': zelevels = np.arange(-10000.,30000.,2000.) contour( x, y, what_I_contour, zelevels, colors='k', linewidths = 0.33 ) #colors='w' )# , alpha=0.5) else: errormess("There is an error in reducing field !") #### Shaded plot if var: what_I_plot, error = reducefield( getfield(nc,var), d4=itime, d3=nvert ) what_I_plot = what_I_plot*mult if not error: fvar = var ### if anomaly: what_I_plot = 100. * ((what_I_plot / smooth(what_I_plot,12)) - 1.) fvar = 'anomaly' #if mult != 1: # fvar = str(mult) + "*" + var ### if typefile in ['mesoapi','meso']: what_I_plot = dumpbdy(what_I_plot,6) zevmin, zevmax = calculate_bounds(what_I_plot,vmin=vmin,vmax=vmax) if colorb in ["def","nobar"]: palette = get_cmap(name=defcolorb(fvar)) else: palette = get_cmap(name=colorb) if not tile: if not hole: what_I_plot = bounds(what_I_plot,zevmin,zevmax) #zelevels = np.linspace(zevmin*(1. + 1.e-7),zevmax*(1. - 1.e-7)) #,num=20) zelevels = np.linspace(zevmin,zevmax) contourf( x, y, what_I_plot, zelevels, cmap = palette ) else: if hole: what_I_plot = nolow(what_I_plot) pcolor( x, y, what_I_plot, cmap = palette, \ vmin=zevmin, vmax=zevmax ) if colorb != 'nobar' and var != 'HGT': colorbar(fraction=0.05,pad=0.03,format=fmtvar(fvar),\ ticks=np.linspace(zevmin,zevmax,ndiv+1),\ extend='neither',spacing='proportional') # both min max neither else: errormess("There is an error in reducing field !") ### Vector plot if winds: vecx, error = reducefield( getfield(nc,uchar), d4=itime, d3=nvert ) vecy, error = reducefield( getfield(nc,vchar), d4=itime, d3=nvert ) if not error: if typefile in ['mesoapi','meso']: [vecx,vecy] = [dumpbdy(vecx,6,stag=uchar), dumpbdy(vecy,6,stag=vchar)] key = True elif typefile in ['gcm']: key = False if metwind: [vecx,vecy] = m.rotate_vector(vecx, vecy, lon2d, lat2d) if var == False: colorvec = definecolorvec(back) else: colorvec = definecolorvec(colorb) vectorfield(vecx, vecy,\ x, y, stride=stride, csmooth=2,\ #scale=15., factor=300., color=colorvec, key=key) scale=20., factor=250., color=colorvec, key=key) #200. ## or csmooth=stride ### Next subplot plottitle = basename if typefile in ['mesoapi','meso']: if addchar: plottitle = plottitle + addchar + "_LT"+str(ltst) else: plottitle = plottitle + "_LT"+str(ltst) if mult != 1: plottitle = str(mult) + "*" + plottitle if zetitle != "fill": plottitle = zetitle ptitle( plottitle ) itime += itstep nplot += 1 ########################################################################## ### Save the figure in a file in the data folder or an user-defined folder if typefile in ['meso','mesoapi']: prefix = getprefix(nc) elif typefile in ['gcm']: prefix = 'LMD_GCM_' else: prefix = '' ### zeplot = prefix + basename if addchar: zeplot = zeplot + addchar if numplot <= 0: zeplot = zeplot + "_LT"+str(abs(numplot)) ### if not target: zeplot = namefile[0:find(namefile,'wrfout')] + zeplot else: zeplot = target + "/" + zeplot ### if found_lct: pad_inches_value = 0.35 if save == 'png': if display: makeplotres(zeplot,res=100.,pad_inches_value=pad_inches_value) #,erase=True) ## a miniature makeplotres(zeplot,res=200.,pad_inches_value=pad_inches_value,disp=False) elif save in ['eps','svg','pdf']: makeplotres(zeplot, pad_inches_value=pad_inches_value,disp=False,ext=save) elif save == 'gui': show() else: print "save mode not supported. using gui instead." show() else: print "Local time not found" ############### ### Now the end return zeplot
df = pd.read_csv(dataset, usecols=cols) # df = pd.read_csv(train_file, usecols=cols, nrows=5) split_data = df["POI/street"].str.split("/", n=1, expand=True) df["POI"] = split_data[0] df["street"] = split_data[1] df['POI_in'] = df.apply(lambda x: x["POI"] in x["raw_address"], axis=1) df['street_in'] = df.apply(lambda x: x["street"] in x["raw_address"], axis=1) # todo street entity df["mock_street"] = " " + df["street"] + " " a = df.mock_street.values.astype(str) b = df.raw_address.values.astype(str) df["start_street"] = find(b, a) + 1 df["mock2_street"] = " " + df["street"] + "," a = df.mock2_street.values.astype(str) df["start2_street"] = find(b, a) + 1 a = df.street.values.astype(str) df["cur_start_street"] = find(b, a) # + 1 # kembangan utara b, df.loc[df['start_street'] == 0, 'start_street'] = df["start2_street"] df.loc[df['start_street'] == 0, 'start_street'] = df["cur_start_street"] df["end_street"] = df["start_street"] + df["street"].str.len() def gen_entity_pos(col_name):
def recipes_containing(self, sub: str): return defchararray.find(self.rec_names, sub) != -1