def gen_entity_pos(col_name):
    b = df[df[col_name] != ""].raw_address.values.astype(str)
    df.loc[df[col_name] == "", 'start_%s' % col_name] = -2
    df.loc[df[col_name] == "", 'end_%s' % col_name] = -2

    df["mock_%s" % col_name] = " " + df[col_name] + " "
    a = df[df[col_name] != ""]["mock_%s" % col_name].values.astype(str)
    df.loc[(df[col_name] != ""), 'start_%s' % col_name] = find(b, a) + 1
    df.loc[(df['start_%s' % col_name] == 0), 'start_%s' % col_name] = -1
    print(df[(df['start_%s' % col_name] != -1)
             & (df['start_%s' % col_name] != -2)].shape)

    b = df[df['start_%s' % col_name] == -1].raw_address.values.astype(str)
    df["mock_%s" % col_name] = " " + df[col_name] + ","
    a = df[df['start_%s' % col_name] == -1]["mock_%s" %
                                            col_name].values.astype(str)
    df.loc[df['start_%s' % col_name] == -1,
           'start_%s' % col_name] = find(b, a) + 1
    df.loc[(df['start_%s' % col_name] == 0), 'start_%s' % col_name] = -1
    print(df[(df['start_%s' % col_name] != -1)
             & (df['start_%s' % col_name] != -2)].shape)

    b = df[df['start_%s' % col_name] == -1].raw_address.values.astype(str)
    a = df[df['start_%s' % col_name] == -1][col_name].values.astype(str)
    df.loc[df['start_%s' % col_name] == -1, 'start_%s' % col_name] = find(b, a)
    print(df[(df['start_%s' % col_name] != -1)
             & (df['start_%s' % col_name] != -2)].shape)
    # print(df[(df['start_%s' % col_name] != -1) & (df['start_%s' % col_name] != -2)])
    # print(df[(df['start_%s' % col_name] != 0)])
    print("--------------")
    # asd
    # kembangan utara b,
    df["end_%s" %
       col_name] = df["start_%s" % col_name] + df[col_name].str.len()
Exemple #2
0
    def _construct_features_array(self, soi):
        """
        Constructs features array.

        :return: numpy array for running the model.
        """

        shape = (len(soi), len(self.features_metadata))
        batch_encoded_features = np.zeros(shape)

        # first feature is the gc content in acceptor region (double acceptor window at the end)
        acceptors = [x[2 * self.acc_i:] for x in soi]
        batch_encoded_features[:, 0] = np.array(
            [self._count_gc_content(acceptor) for acceptor in acceptors])
        # second feature is gc content in intron region
        introns = [x[self.don_i:self.acc_i] for x in soi]
        batch_encoded_features[:, 1] = np.array(
            [self._count_gc_content(intron) for intron in introns])

        # get the list of bp index for each sequence of batch
        self.bp_indexes = self._get_bp_indexes_labranchor(soi)
        # slice out feature sequences
        # seqA = [ seq[self.acc_i - 4 : self.acc_i + 6] for seq in soi]
        seqB = np.array([
            soi[j][int(self.bp_indexes[j]) - 15:int(self.bp_indexes[j]) + 6]
            for j in range(len(soi))
        ])
        B_i = 15
        # seqD = [ seq[self.don_i - 3 : self.acc_i + 16] for seq in soi]

        # fill out the rest of the features (base-by-region features)
        for i in range(2, len(self.features_metadata)):
            # parse the current feature info
            (region, pos, nucl) = self.features_metadata[i]
            if (region == 'seqD' or region
                    == 'seqA'):  # decrement, since acc_i/don_i is pos = 1
                if pos > 0:
                    pos -= 1
                # apply vectorized numpy operations
                if region == 'seqD':
                    idx = self.don_i + int(pos)
                else:
                    idx = self.acc_i + int(pos)
                feat_column = npc.find(soi, nucl, idx, idx + 1)
            else:
                idx = B_i + int(pos)
                feat_column = npc.find(seqB, nucl, idx, idx + 1)

            feat_column[feat_column > 1] = 1
            feat_column[feat_column == -1] = 0
            batch_encoded_features[:, i] = feat_column

        return batch_encoded_features
    def __init__(self, num_clus, file_id, samp_size):

        filename = 'elki_output/' + file_id + '/' + file_id + '-k' + str(num_clus) + '-samp' + str(samp_size) + '/elki-clusters.txt'
        reader = csv.reader(open(filename, "r"), delimiter=",")
        x = list(reader)
        self.cluster_centers = np.array(x).astype(str)
        self.num_clus = num_clus
        
        filename_full = 'elki_output/' + file_id + '/' + file_id + '-k' + str(num_clus) + '/full-elki-nosamp.txt'
        print(filename_full)
        # self.full_data = np.array(list(csv.reader(open(filename_full, "r"), delimiter="\n"))).astype(str)

        raw_data_ingest = np.array(list(csv.reader(open(filename_full, "r"), delimiter="\n")))
        raw_data_str = raw_data_ingest[find(raw_data_ingest, 'ID') != -1]

        split_str = lambda x: x.split(" ")
        self.full_data = np.array(list(map(split_str, raw_data_str)))

        k = int(len(self.full_data)/num_clus - (len(self.full_data) % num_clus) )
        print(k)
        self.k = k
        self.n_eq = (len(self.full_data) - (len(self.full_data) % k))/k

        self.coords = self.full_data[:, 1:3].astype(float)[0:int(self.n_eq * k)]
        self.raw_coords = self.full_data[:, 1:3].astype(float)        
        
        self.dist_mat = euclidean_distances(self.coords, self.cluster_centers)
Exemple #4
0
def search():
    params = app.current_request.query_params or {}
    patterns = params.get('label-filter', '').split()
    all_labels = []
    for name in repo.ls():
        clct = repo / name
        labels = asarray(clct.ls(), dtype="U")
        for pattern in patterns:
            cond = find(char.lower(labels), pattern.lower()) != -1
            all_labels.extend(f'{name}/{l}' for l in labels[cond])
    return render_template('search-modal.html', labels=all_labels)
Exemple #5
0
def gen_entity_pos(col_name):
    df["mock_%s" % col_name] = " " + df[col_name] + " "
    a = df["mock_%s" % col_name].values.astype(str)
    b = df.raw_address.values.astype(str)
    df["start_%s" % col_name] = find(b, a) + 1

    df["mock2_%s" % col_name] = " " + df[col_name] + ","
    a = df["mock2_%s" % col_name].values.astype(str)
    df["start2_%s" % col_name] = find(b, a) + 1

    a = df[col_name].values.astype(str)
    df["cur_start_%s" % col_name] = find(b, a)  # + 1

    # kembangan utara b,
    df.loc[df['start_%s' % col_name] == 0,
           'start_%s' % col_name] = df["start2_%s" % col_name]
    df.loc[df['start_%s' % col_name] == 0,
           'start_%s' % col_name] = df["cur_start_%s" % col_name]
    df["end_%s" %
       col_name] = df["start_%s" % col_name] + df[col_name].str.len()
def re_gen_entity_pos(col_name):
    b = df[df['start_%s' % col_name] == -1].hash_raw_address.values.astype(str)
    df["mock_%s" % col_name] = " " + df[col_name] + " "
    a = df[df['start_%s' % col_name] == -1]["mock_%s" %
                                            col_name].values.astype(str)
    df.loc[df['start_%s' % col_name] == -1,
           'temp_start_%s' % col_name] = find(b, a) + 1
    df.loc[(df['temp_start_%s' % col_name] > 0),
           'start_%s' % col_name] = df['temp_start_%s' % col_name]
    print(df[(df['start_%s' % col_name] != -1)
             & (df['start_%s' % col_name] != -2)].shape)

    b = df[df['start_%s' % col_name] == -1].hash_raw_address.values.astype(str)
    df["mock_%s" % col_name] = " " + df[col_name] + ","
    a = df[df['start_%s' % col_name] == -1]["mock_%s" %
                                            col_name].values.astype(str)
    df.loc[df['start_%s' % col_name] == -1,
           'temp_start_%s' % col_name] = find(b, a) + 1
    df.loc[(df['temp_start_%s' % col_name] > 0),
           'start_%s' % col_name] = df['temp_start_%s' % col_name]
    print(df[(df['start_%s' % col_name] != -1)
             & (df['start_%s' % col_name] != -2)].shape)

    b = df[df['start_%s' % col_name] == -1].hash_raw_address.values.astype(str)
    a = df[df['start_%s' % col_name] == -1][col_name].values.astype(str)
    df.loc[df['start_%s' % col_name] == -1, 'start_%s' % col_name] = find(b, a)
    print(df[(df['start_%s' % col_name] != -1)
             & (df['start_%s' % col_name] != -2)].shape)
    check_hash = (df[(df['start_%s' %
                         col_name] == -1)][["POI", "hash_raw_address"]])
    check_hash.to_csv("check_hash.csv")
    print("--------------")  # unmatch: 32671 -> 21035
    # asd
    # kembangan utara b,
    df["end_%s" %
       col_name] = df["start_%s" % col_name] + df[col_name].str.len()
def trace_process(alignment_matrix):

    traceback_matrix_rows = np.zeros(alignment_matrix.shape)
    traceback_matrix_cols = np.zeros(alignment_matrix.shape)

    #计算转移矩阵
    transition_submatrix = None
    for cl in range(1, alignment_matrix.shape[0]):
        this_row_best_score = np.ones(
            shape=(1, int(num_candidate_matches[(cl)]))) * (-np.inf)
        this_row_best_row_from = np.zeros(
            shape=(1, int(num_candidate_matches[cl])))
        this_row_best_col_from = np.zeros(
            shape=(1, int(num_candidate_matches[cl])))
        #对每个row计算转移矩阵,包括badlevel惩罚 对应matlab calculateSequenceVV中209行
        for cfrom in range(np.min(max_sequential_bad, cl - 1)):
            transition_submatrix=np.reshape(p_vals[cl,cfrom,transition_info.perm_matrix[candidate_matches[cl-cfrom,:],candidate_matches[cl,:]]]+\
                np.sum(p_bad[(cl-cfrom):cl]),num_candidate_matches[cl-cfrom],num_candidate_matches[cl],p_good[cl-cfrom])
            #为每个to找到最好的from
            from_score, where_from = np.max(
                np.transpose(transition_submatrix +
                             alignment_matrix[cl - cfrom,
                                              candidate_matches[cl -
                                                                cfrom, :]]),
                axis=0)
            #找到比目前最好元素更好的to元素
            better_score = from_score > this_row_best_score
            #获取非0元素索引和值
            from_indices = find(candidate_matches[cl - cfrom, :])
            #更新最好的序列路径
            this_row_best_col_from[better_score] = from_indices[
                where_from[better_score]]
            this_row_best_row_from[better_score] = cl - cfrom
            this_row_best_score[better_score] = from_score[better_score]
        #使用最佳选项更新alignment和traceback矩阵
        alignment_matrix[cl, :] = this_row_best_score + alignment_matrix[
            cl, candidate_matches[cl, :]]
        traceback_matrix_rows[cl, candidate_matches[
            cl, :]] = this_row_best_row_from
        traceback_matrix_cols[cl, candidate_matches[
            cl, :]] = this_row_best_col_from

    #找到开始点,相当于dtw中的最右下角的点
    total_score, kmer = np.max(alignment_matrix[-1, :])
    #???为什么要log
    alignment_matrix = alignment_matrix - np.log(alignment_matrix)
Exemple #8
0
def domain (namefile,proj=None,back="vishires",target=None): 
    from netCDF4 import Dataset
    from myplot import getcoord2d,define_proj,makeplotres,simplinterv,getprefix,dumpbdy,getproj,latinterv,wrfinterv,simplinterv
    from mymath import max,min
    from matplotlib.pyplot import contourf,rcParams,pcolor
    from numpy.core.defchararray import find
    from numpy import arange
    ###
    nc  = Dataset(namefile)
    ###
    if proj == None:  proj = "ortho" #proj = getproj(nc)
    ###
    prefix = namefile[0] + namefile[1] + namefile[2]
    if prefix == "geo":   
        [lon2d,lat2d] = getcoord2d(nc,nlat='XLAT_M',nlon='XLONG_M')
        var = 'HGT_M'
        zeplot = "domain" 
    else:                 
        [lon2d,lat2d] = getcoord2d(nc)
        var = "HGT"
        zeplot = getprefix(nc) + "domain"
    ###
    lon2d = dumpbdy(lon2d,5)
    lat2d = dumpbdy(lat2d,5)
    if proj == "npstere":             [wlon,wlat] = latinterv("North_Pole")
    elif proj in ["lcc","laea"]:      [wlon,wlat] = wrfinterv(lon2d,lat2d)
    else:                             [wlon,wlat] = simplinterv(lon2d,lat2d)
    ###
    m = define_proj(proj,wlon,wlat,back=back)
    x, y = m(lon2d, lat2d)
    ###
    what_I_plot = dumpbdy(nc.variables[var][0,:,:], 5)
    #levinterv = 250.
    #zelevels = arange(min(what_I_plot)-levinterv,max(what_I_plot)+levinterv,levinterv)
    zelevels = 30
    contourf(x, y, what_I_plot, zelevels)
    #pcolor(x,y,what_I_plot)  ## on voit trop les lignes !
    ###
    if not target:   zeplot = namefile[0:find(namefile,'wrfout')] + zeplot
    else:            zeplot = target + "/" + zeplot          
    ###
    pad_inches_value = 0.35
    makeplotres(zeplot,res=100.,pad_inches_value=pad_inches_value) #,erase=True)  ## a miniature
    makeplotres(zeplot,res=200.,pad_inches_value=pad_inches_value,disp=False)
Exemple #9
0
def main():

    parser = argparse.ArgumentParser(
        description='Check the output of Mothur pcr.seqs command.')
    parser.add_argument('-s', '--size', metavar = '', required = True, \
                        help = 'Specify sample size')
    parser.add_argument('-b', '--before', metavar = '', required = True, \
                        help = 'Specify fasta file that is input to pcr.seqs')
    parser.add_argument('-a', '--after', metavar = '', required = True, \
                        help = 'Specify fasta file output from pcr.seqs')
    parser.add_argument('-l', '--oligos', metavar = '', required = True, \
                        help = 'Specify oligos file')
    parser.add_argument('-g', '--group', metavar = '', required = True, \
                        help = 'Specify group file')
    parser.add_argument('-o', '--outfile', metavar = '', required = True, \
                        help = 'Specify path and name for output file')

    args = parser.parse_args()

    # Generate a random sample of integers.
    # Range of the random sample equals the number of reads in the pcr.seqs output fasta file
    random_sample = random.sample(range(round(file_len(args.after) / 2)),
                                  int(args.size))

    # We need to get the reads based on our random sample.
    # Then we need to get the sequence info for these IDs (sequence after pcr.seqs and before it)
    # Then we need to merge this info with the primer sequence info, and print this table

    d = {}  # Dictionary to hold random sample of read IDs and their sequences
    for i in random_sample:
        line = linecache.getline(
            args.after, i)  # Get the line corresponding to the random integer
        # If line is a READ ID, make that the key. If not, make the next line the key.
        if line.startswith('>'):
            key = line.rstrip().split()[0][1:]
            key = key.split('|')[0]
            seq = linecache.getline(args.after, i + 1)
        else:
            key = linecache.getline(args.after, i + 1).split()[0][1:]
            key = key.split('|')[0]
            seq = linecache.getline(args.after, i + 2)
        d[key] = seq.strip(
        )  # Key =  read ID, value = sequence after pcr.seqs cmd was run

    # For the read IDs in dict, get the sequence before pcr.seqs command was run
    with open(args.before, 'r') as f:
        for line in f:
            if line.startswith('>'):
                if line.strip().split()[0][1:] in d.keys():
                    append_value(d,
                                 line.strip().split()[0][1:],
                                 next(f).strip())

    # For the read IDs in dict, get the primer that Mothur has identified in them
    with open(args.group, 'r') as f:
        for line in f:
            if line.strip().split()[0] in d.keys():
                primer = line.strip().split()[1]
                append_value(d, line.strip().split()[0], primer.split('.')[1])

    o = {}  # Dictionary to hold oligos file info (primer seqs)
    with open(args.oligos, 'r') as f:
        for line in f:
            if line.startswith('primer'):
                key = line.strip().split()[3]  # primer name
                fwd = line.strip().split()[1]  # forward primer seq
                rev = line.strip().split()[2]  # reverse primer seq
                rev_seq = Seq(rev)
                rev_compl = str(rev_seq.reverse_complement()
                                )  # Reverse complement of rev primer
                fwd_seq = Seq(fwd)
                fwd_compl = str(fwd_seq.reverse_complement()
                                )  # Reverse complement of fwd primer
                o[key] = fwd  # Key is primer name, 1st value is forward primer seq
                append_value(o, key,
                             fwd_compl)  # 2nd value: rev compl fwd primer
                append_value(o, key, rev)  # 3rd value: reverse primer
                append_value(o, key,
                             rev_compl)  # 4th value: rev compl rev primer

    reads = pd.DataFrame([(k, *v) for k, v in d.items()])
    reads.columns = ('id', 'after', 'before', 'primer_name')

    #
    primers = pd.DataFrame([(k, *v) for k, v in o.items()])
    primers.columns = ('primer_name', 'fwd', 'fwd_rc', 'rev', 'rev_rc')

    # Merge the primer sequence into the sample of read IDs
    full = pd.merge(reads, primers, on='primer_name', how='left')

    # Get the fragment with primers trimmed off
    a = full.fwd.values.astype(str)
    b = full.before.values.astype(str)
    full = full.assign(start_fwd=find(b, a))  # Start of fwd primer sequence
    full['fwd_len'] = full['fwd'].str.len(
    )  # Length of primer seq [start the fragment at start+len]
    full['start'] = full['start_fwd'] + full[
        'fwd_len']  # This is where the primer-less fragment starts
    a = full.rev.values.astype(str)
    full = full.assign(end=find(
        b, a))  # Start of rev_compl primer sequence [end fragment 'up to' end]
    full['segment'] = full.apply(
        lambda x: x[2][x[10]:x[11]], axis=1
    )  # x[2] is the 'before' seq, x[10] is index after fwd primer ends, x[11] is index where rev primer begins
    full.loc[
        full['start_fwd'] == -1,
        'segment'] = "Forward primer not found"  # Indicate if the forward primer not found

    new_full = full[[
        'id', 'before', 'fwd', 'rev_rc', 'after', 'segment', 'primer_name',
        'fwd_rc', 'rev', 'start_fwd', 'start', 'end'
    ]]

    new_full.to_csv(args.outfile, header=True, index=False, sep='\t')
Exemple #10
0
    def _construct_features_array(self,soi):
        """
        Constructs features array.
        
        :return: numpy array for running the model.
        """
        
        shape = ( len(soi), len(self.features_metadata) )
        batch_encoded_features = np.zeros( shape )
        
        # first feature is the gc content in acceptor region (double acceptor window at the end)
        acceptors = [ x[ 2*self.acc_i :] for x in soi ] 
        batch_encoded_features[:, 0] = np.array( [ self._count_gc_content(acceptor) for acceptor in acceptors ] )
        # second feature is gc content in intron region
        introns = [ x[ self.don_i : self.acc_i ] for x in soi ]
        batch_encoded_features[:, 1] = np.array( [ self._count_gc_content(intron) for intron in introns ] )
        # slice out feature sequences
        #seqA = [ seq[self.acc_i - 4 : self.acc_i + 6] for seq in soi]
        seqB = np.array([ soi[j][int(self.bp_indexes[j]) - 15: int(self.bp_indexes[j]) + 6] for j in range(len(soi))])
        B_i = 15
        #seqD = [ seq[self.don_i - 3 : self.acc_i + 16] for seq in soi]
        
        # fill out the rest of the features (base-by-region features)
        for i in range(2, len(self.features_metadata)):
            # parse the current feature info
            (region, pos, nucl) = self.features_metadata[i]
            if (region == 'seqD' or region == 'seqA'): #decrement, since acc_i/don_i is pos = 1
                if pos > 0:
                    pos -= 1
                #apply vectorized numpy operations
                if region == 'seqD':
                    idx = self.don_i + int(pos)
                else:
                    idx = self.acc_i + int(pos)
                feat_column = npc.find(soi, nucl, idx, idx + 1)    
            else:
                idx = B_i + int(pos)
                feat_column = npc.find(seqB, nucl, idx, idx + 1)
                
            feat_column[feat_column>1] = 1
            feat_column[feat_column == -1] = 0
            batch_encoded_features[:, i] = feat_column
            #    for j in range( len(soi) ):
            #        if region == 'seqB':
            #            i_oi = int(self.bp_indexes[j]) + int(pos)
            #            if soi[j][i_oi].upper() == nucl:
            #                batch_encoded_features[j, i] = 1
            #        else:
            #            if region == 'seqA' and soi[j][ (self.acc_i + int(pos)) ].upper() == nucl:
            #                batch_encoded_features[j, i] = 1
            #            elif region == 'seqD' and soi[j][ (self.don_i + int(pos)) ].upper() == nucl:
            #                batch_encoded_features[j, i] = 1
           #   executor = concurrent.futures.ProcessPoolExecutor(10)
           #    futures = [executor.submit(work_seq_on_feature, seqA[j], seqB[j], seqD[j], region, pos, nucl, j) for j in range(len(soi))]
           #    concurrent.futures.wait(futures)

            #pool = ProcessPool(nodes = 10)
            #feat_column = np.array(pool.map(work_seq_on_feature, seqA, seqB, seqD, [ region for i in range(len(soi))], [ pos for i in range(len(soi))], \
            #                                [nucl for i in range(len(soi))]))
            
            
            #for future in futures:
            #    (seq_idx, value) = future.result()
            #    if value != 0:
            #        feat_column[seq_idx] = value
            
            
        
        return batch_encoded_features
Exemple #11
0
 def match(x, pat):
     return np.any((npchar.find(x, pat) + 1).astype(bool), axis=1)
Exemple #12
0
 def match(x, pat):
     return np.any((npchar.find(x, pat) + 1).astype(bool), axis=1)
Exemple #13
0
def get_question_type_stat(keyword, questions, f1_em):
    questions_ind = get_samples_with_conditions(questions, lambda s : find(s[:], keyword) >= 0)
    f1_em_type = [f1_em[i] for i in questions_ind]
    print np.mean(f1_em_type, axis=0)
    print (1.0 * len(questions_ind)) / len(questions)
Exemple #14
0
def winds (namefile,\
           nvert,\
           proj=None,\
           back=None,\
           target=None,
           stride=3,\
           numplot=2,\
           var=None,\
           colorb="def",\
           winds=True,\
           addchar=None,\
           interv=[0,1],\
           vmin=None,\
           vmax=None,\
           tile=False,\
           zoom=None,\
           display=True,\
           itstep=None,\
           hole=False,\
           save="gui",\
           anomaly=False,\
           var2=None,\
           ndiv=10,\
           first=1,\
           mult=1.,\
           zetitle="fill"):

    ####################################################################################################################
    ### Colorbars http://www.scipy.org/Cookbook/Matplotlib/Show_colormaps?action=AttachFile&do=get&target=colormaps3.png

    #################################
    ### Load librairies and functions
    from netCDF4 import Dataset
    from myplot import getcoord2d,define_proj,makeplotres,simplinterv,vectorfield,ptitle,latinterv,getproj,wrfinterv,dumpbdy,\
                       fmtvar,definecolorvec,defcolorb,getprefix,putpoints,calculate_bounds,errormess,definesubplot,\
                       zoomset,getcoorddef,getwinddef,whatkindfile,reducefield,bounds,getstralt,getfield,smooth,nolow,\
                       getname,localtime,polarinterv
    from mymath import deg,max,min,mean
    from matplotlib.pyplot import contour,contourf, subplot, figure, rcParams, savefig, colorbar, pcolor, show
    from matplotlib.cm import get_cmap
    import numpy as np
    from numpy.core.defchararray import find

    ######################
    ### Load NETCDF object
    nc  = Dataset(namefile)  
    
    ##################################
    ### Initial checks and definitions
    typefile = whatkindfile(nc)                                  ## TYPEFILE
    if var not in nc.variables: var = False                      ## VAR
    if winds:                                                    ## WINDS
        [uchar,vchar,metwind] = getwinddef(nc)             
        if uchar == 'not found': winds = False
    if not var and not winds: errormess("please set at least winds or var",printvar=nc.variables)
    [lon2d,lat2d] = getcoorddef(nc)                              ## COORDINATES, could be moved below
    if proj == None:   proj = getproj(nc)                        ## PROJECTION

    ##########################
    ### Define plot boundaries
    ### todo: possible areas in latinterv in argument (ex: "Far_South_Pole")
    if proj in ["npstere","spstere"]: [wlon,wlat] = polarinterv(lon2d,lat2d)
    elif proj in ["lcc","laea"]:      [wlon,wlat] = wrfinterv(lon2d,lat2d)
    else:                             [wlon,wlat] = simplinterv(lon2d,lat2d)
    if zoom:                          [wlon,wlat] = zoomset(wlon,wlat,zoom) 

    #########################################
    ### Name for title and graphics save file
    basename = getname(var=var,winds=winds,anomaly=anomaly)
    basename = basename + getstralt(nc,nvert)  ## can be moved elsewhere for a more generic routine

    ##################################
    ### Open a figure and set subplots
    fig = figure()
    subv,subh = definesubplot( numplot, fig ) 
 
    #################################
    ### Time loop for plotting device
    found_lct = False
    nplot = 1
    itime = first
    error = False
    if itstep is None and numplot > 0: itstep = int(24./numplot)
    elif numplot <= 0:                 itstep = 1
    while error is False: 

       ### Which local time ?
       ltst = localtime ( interv[0]+itime*interv[1], 0.5*(wlon[0]+wlon[1]) )

       ### General plot settings
       #print itime, int(ltst), numplot, nplot
       if numplot >= 1: 
           if nplot > numplot: break
           if numplot > 1:     
               if typefile not in ['geo']:  subplot(subv,subh,nplot)
           found_lct = True
       ### If only one local time is requested (numplot < 0)
       elif numplot <= 0: 
           if int(ltst) + numplot != 0:	
                 itime += 1 
                 if found_lct is True: break     ## because it means LT was found at previous iteration
                 else:                 continue  ## continue to iterate to find the correct LT
           else: 
                 found_lct = True

       ### Map projection
       m = define_proj(proj,wlon,wlat,back=back)
       x, y = m(lon2d, lat2d)

       #### Contour plot
       if var2:
           what_I_contour, error = reducefield( getfield(nc,var2), d4=itime, d3=nvert )
           if not error:
               if typefile in ['mesoapi','meso']:    what_I_contour = dumpbdy(what_I_contour,6)
               zevmin, zevmax = calculate_bounds(what_I_contour)
               zelevels = np.linspace(zevmin,zevmax,num=20)
               if var2 == 'HGT':  zelevels = np.arange(-10000.,30000.,2000.)
               contour( x, y, what_I_contour, zelevels, colors='k', linewidths = 0.33 ) #colors='w' )# , alpha=0.5)
           else:
               errormess("There is an error in reducing field !")

       #### Shaded plot
       if var:
           what_I_plot, error = reducefield( getfield(nc,var), d4=itime, d3=nvert )
           what_I_plot = what_I_plot*mult
           if not error: 
               fvar = var
               ###
               if anomaly:
                   what_I_plot = 100. * ((what_I_plot / smooth(what_I_plot,12)) - 1.)
                   fvar = 'anomaly'
               #if mult != 1:     
               #    fvar = str(mult) + "*" + var
               ###
               if typefile in ['mesoapi','meso']:    what_I_plot = dumpbdy(what_I_plot,6)
               zevmin, zevmax = calculate_bounds(what_I_plot,vmin=vmin,vmax=vmax)
               if colorb in ["def","nobar"]:   palette = get_cmap(name=defcolorb(fvar))
               else:                           palette = get_cmap(name=colorb)
               if not tile:
                   if not hole: what_I_plot = bounds(what_I_plot,zevmin,zevmax)
                   #zelevels = np.linspace(zevmin*(1. + 1.e-7),zevmax*(1. - 1.e-7)) #,num=20)
                   zelevels = np.linspace(zevmin,zevmax)
                   contourf( x, y, what_I_plot, zelevels, cmap = palette )
               else:
                   if hole:  what_I_plot = nolow(what_I_plot) 
                   pcolor( x, y, what_I_plot, cmap = palette, \
                           vmin=zevmin, vmax=zevmax )
               if colorb != 'nobar' and var != 'HGT':              
                         colorbar(fraction=0.05,pad=0.03,format=fmtvar(fvar),\
                                           ticks=np.linspace(zevmin,zevmax,ndiv+1),\
                                           extend='neither',spacing='proportional')
                                           # both min max neither 
           else:
               errormess("There is an error in reducing field !")
 
       ### Vector plot
       if winds:
           vecx, error = reducefield( getfield(nc,uchar), d4=itime, d3=nvert )
           vecy, error = reducefield( getfield(nc,vchar), d4=itime, d3=nvert )
           if not error:
               if typefile in ['mesoapi','meso']:    
                   [vecx,vecy] = [dumpbdy(vecx,6,stag=uchar), dumpbdy(vecy,6,stag=vchar)]
                   key = True
               elif typefile in ['gcm']:            
                   key = False
               if metwind:  [vecx,vecy] = m.rotate_vector(vecx, vecy, lon2d, lat2d)
               if var == False:       colorvec = definecolorvec(back)
               else:                  colorvec = definecolorvec(colorb)
               vectorfield(vecx, vecy,\
                          x, y, stride=stride, csmooth=2,\
                          #scale=15., factor=300., color=colorvec, key=key)
                          scale=20., factor=250., color=colorvec, key=key)
                                            #200.         ## or csmooth=stride
               
       ### Next subplot
       plottitle = basename
       if typefile in ['mesoapi','meso']:
            if addchar:  plottitle = plottitle + addchar + "_LT"+str(ltst)
            else:        plottitle = plottitle + "_LT"+str(ltst)
       if mult != 1:           plottitle = str(mult) + "*" + plottitle
       if zetitle != "fill":   plottitle = zetitle
       ptitle( plottitle )
       itime += itstep
       nplot += 1

    ##########################################################################
    ### Save the figure in a file in the data folder or an user-defined folder
    if typefile in ['meso','mesoapi']:   prefix = getprefix(nc)   
    elif typefile in ['gcm']:            prefix = 'LMD_GCM_'
    else:                                prefix = ''
    ###
    zeplot = prefix + basename 
    if addchar:         zeplot = zeplot + addchar
    if numplot <= 0:    zeplot = zeplot + "_LT"+str(abs(numplot))
    ###
    if not target:      zeplot = namefile[0:find(namefile,'wrfout')] + zeplot
    else:               zeplot = target + "/" + zeplot  
    ###
    if found_lct:     
        pad_inches_value = 0.35
        if save == 'png': 
            if display: makeplotres(zeplot,res=100.,pad_inches_value=pad_inches_value) #,erase=True)  ## a miniature
            makeplotres(zeplot,res=200.,pad_inches_value=pad_inches_value,disp=False)
        elif save in ['eps','svg','pdf']:
            makeplotres(zeplot,         pad_inches_value=pad_inches_value,disp=False,ext=save)
        elif save == 'gui':
            show()
        else: 
            print "save mode not supported. using gui instead."
            show()
    else:   print "Local time not found"

    ###############
    ### Now the end
    return zeplot
Exemple #15
0
df = pd.read_csv(dataset, usecols=cols)
# df = pd.read_csv(train_file, usecols=cols, nrows=5)

split_data = df["POI/street"].str.split("/", n=1, expand=True)
df["POI"] = split_data[0]
df["street"] = split_data[1]

df['POI_in'] = df.apply(lambda x: x["POI"] in x["raw_address"], axis=1)
df['street_in'] = df.apply(lambda x: x["street"] in x["raw_address"], axis=1)

# todo street entity
df["mock_street"] = " " + df["street"] + " "
a = df.mock_street.values.astype(str)
b = df.raw_address.values.astype(str)
df["start_street"] = find(b, a) + 1

df["mock2_street"] = " " + df["street"] + ","
a = df.mock2_street.values.astype(str)
df["start2_street"] = find(b, a) + 1

a = df.street.values.astype(str)
df["cur_start_street"] = find(b, a)  # + 1

# kembangan utara b,
df.loc[df['start_street'] == 0, 'start_street'] = df["start2_street"]
df.loc[df['start_street'] == 0, 'start_street'] = df["cur_start_street"]
df["end_street"] = df["start_street"] + df["street"].str.len()


def gen_entity_pos(col_name):
Exemple #16
0
 def recipes_containing(self, sub: str):
     return defchararray.find(self.rec_names, sub) != -1