def pca(self, data_matrix): """Perform PCA. Principal components are given in self.pca, and the variance in self.variance. Parameters ---------- data_matrix : list of lists List of tetranucleotide signatures """ cols = len(data_matrix[0]) data_matrix = np_reshape(np_array(data_matrix), (len(data_matrix), cols)) pca = PCA() pc, variance = pca.pca_matrix(data_matrix, 3, bCenter=True, bScale=False) # ensure pc matrix has at least 3 dimensions if pc.shape[1] == 1: pc = np_append(pc, np_zeros((pc.shape[0], 2)), 1) variance = np_append(variance[0], np_ones(2)) elif pc.shape[1] == 2: pc = np_append(pc, np_zeros((pc.shape[0], 1)), 1) variance = np_append(variance[0:2], np_ones(1)) return pc, variance
def unbalanced_loads(all_weight): """ 车辆偏载统计 :param all_weight: :return: """ unbalanced_loads_coe = float(conf.unbalanced_loads_coe()) # (空载时的)偏载系数 # 每列车厢左右轮重统计 wheel_weight = all_weight[0] wheel_weight_arr = wheel_weight.reshape((-1, 4, 2)) wheel_weight_arr_trans = wheel_weight_arr.transpose((1, 0, 2)) wheel_weight_sum = sum(wheel_weight_arr_trans) # 每列车厢左右轴重统计 axle_weight = all_weight[1] axle_weight_arr = axle_weight.reshape((-1, 4)) axle_weight_arr_trans = axle_weight_arr.transpose((1, 0)) axle_weight_arr_trans_sum = sum(axle_weight_arr_trans) axle_weight_arr_trans_half = axle_weight_arr_trans_sum / 2 axle_weight_new_sum = np_append(axle_weight_arr_trans_half, axle_weight_arr_trans_half) axle_weight_sum = around(axle_weight_new_sum.reshape((2, -1)).transpose((1, 0)), 4) # 每列车厢左右转向架重统计 bogie_weight = all_weight[2] bogie_weight_arr = bogie_weight.reshape((-1, 2)).transpose((1, 0)) bogie_weight_arr_sum = sum(bogie_weight_arr) bogie_weight_arr_half = bogie_weight_arr_sum / 2 bogie_weight_new_sum = np_append(bogie_weight_arr_half, bogie_weight_arr_half) bogie_weight_sum = around(bogie_weight_new_sum.reshape((2, -1)).transpose((1, 0)), 4) # 每列车厢左右车厢重统计 carriage_weight = all_weight[3] carriage_weight_arr_half = carriage_weight / 2 carriage_weight_new_sum = np_append(carriage_weight_arr_half, carriage_weight_arr_half) carriage_weight_sum = around(carriage_weight_new_sum.reshape((2, -1)).transpose((1, 0)), 4) # 整辆列车左右重量统计 total_car_weight = wheel_weight_sum + axle_weight_sum + bogie_weight_sum + carriage_weight_sum # 各个车厢偏载统计 diff_set = [] is_unbalanced_loads = [] for each_carriage_weight in total_car_weight: left_carriage_weight = each_carriage_weight[0] right_carriage_weight = each_carriage_weight[1] diff = round(abs(left_carriage_weight - right_carriage_weight), 4) mean_each_carriage_weight = round((left_carriage_weight + right_carriage_weight) / 2, 4) each_carriage_coe = unbalanced_loads_coe * sum(each_carriage_weight) / 38 diff_set.append(diff) if mean_each_carriage_weight != 0: if diff > each_carriage_coe: # 偏载:左右车厢的重量差值 is_unbalanced_loads.append(1) # 1表示偏载 else: is_unbalanced_loads.append(0) # 0表示未偏载 else: is_unbalanced_loads.append(0) return is_unbalanced_loads
def _rqa_lv_dist_min(self, th, m_min, freq_dist, ret_sum=0): l = np_array([]) p = np_array([]) for item in freq_dist(th).items(): if float(item[0]) >= m_min: if item[1]: l = np_append(l, item[0]) p = np_append(p, item[1]) if ret_sum: return p.sum() else: return (l, p)
def init_ols_balanced(X, y, nn, random_state): n_samples = y.shape[0] n_neurons = nn.get_number_last_hidden_neurons() partial_semantics = zeros((n_samples, n_neurons)) for i, hidden_neuron in enumerate(nn.hidden_layers[-1]): partial_semantics[:, i] = hidden_neuron.get_semantics() for output_index, output_neuron in enumerate(nn.output_layer): output_y = y[:, output_index] output_y_class_1_indices = where(output_y == 1)[0] output_y_class_1_count = output_y_class_1_indices.shape[0] output_y_class_0_count = n_samples - output_y_class_1_count sample_weights = ones(n_samples) class_1_weight = output_y_class_0_count / output_y_class_1_count sample_weights[output_y_class_1_indices] = class_1_weight reg = LinearRegression().fit(partial_semantics, output_y, sample_weights) optimal_weights = np_append(reg.coef_.T, reg.intercept_) # Update connections with the learning step value: for i in range(n_neurons): output_neuron.input_connections[-n_neurons + i].weight = optimal_weights[i] output_neuron.increment_bias(optimal_weights[-1])
def setUpClass(cls): # Mock unifrom distance objects and outliers cls.length_dict = {} for i in range(4): label = "group" + str(i + 1) cls.length_dict[label] = data outliers = [300, 400, 600, 700] # Make ScrollSeq objects and add outliers cls.seq_dict = {} cls.len_list = [] group_counter = 0 for group_id, ((k, v), new_val) in enumerate( zip( cls.length_dict.items(), outliers, )): length_list = list(np_append(v, [new_val])) for val_id, length in enumerate(length_list): cls.len_list.append(int(length)) seq_obj = MockSeq( obj_id="{}.{}".format(group_id, val_id), value=int(length), ) try: cls.seq_dict[k].append(seq_obj) except KeyError: cls.seq_dict[k] = [] cls.seq_dict[k].append(seq_obj) # Finally, set up single class instance` cls.z_obj = Filter(cls.seq_dict, filter_method='zscore')
def ModMatInv(key, alpha_len, matrix_len): key_ident = np_append(key, identity(matrix_len), axis=1) key_det = round(det(key)) % alpha_len key_det_ext = ExtendedEuclidean(alpha_len, key_det) inverted_key = ModMatInit(key, key_ident, key_det_ext, alpha_len, matrix_len) return inverted_key
def EncryptDecryptHill_German(text_ascii_30, key, matrix_len, alpha_len): total_len = ceil(len(text_ascii_30) / matrix_len) text_key = zeros((1, total_len * matrix_len - len(text_ascii_30))) text_key = np_append(text_ascii_30, text_key) text_key = text_key.reshape(matrix_len, total_len, order='F') enc_dec_matrix = dot(text_key.T, key) % alpha_len total_len = total_len * matrix_len enc_dec_matrix = enc_dec_matrix.T.reshape(1, total_len, order='F') return [int(i) for i in array(enc_dec_matrix)[0].tolist()]
def gen_feature(self, sent, pos, prev2_label, prev_label): """ get features vector from self.syn0 matrix get words index :param sent: :param pos: :param prev2_label: :param prev_label: :return: """ u, u_1, u_2, u1, u2, b_1, b_2, b1, b2 = self.gen_unigram_bigram( sent, pos) ngram_feature_list = [u, u_1, u_2, u1, u2 ] + [''.join(b) for b in [b_1, b_2, b1, b2]] if self.no_bigram_feature: ngram_feature_list = ngram_feature_list[:-4] if self.no_unigram_feature: ngram_feature_list = ngram_feature_list[5:] state_feature_list = [ self.su_prefix + self.state_varient[int(item[0])] + item[1] for item in zip([prev2_label, prev_label], [u_2, u_1]) ] if not self.no_sb_feature: state_feature_list.append( self.sb_prefix + self.state_varient[int(prev_label)] + ''.join(b_1)) # change the bigram state def if self.no_action_feature: feat_list = ngram_feature_list else: feat_list = ngram_feature_list + state_feature_list feature_index_list = map(self.word2index, feat_list) feature_vec = self.syn0[feature_index_list].ravel() if self.no_binary_action_feature: feat_vec = feature_vec else: feat_vec = np_append( feature_vec, asanyarray([ float(prev2_label), float(prev_label) ])) ########### !!!!! tmp block the previous state feature.. # print 'feature shape', feat_vec.shape # print 'feature_shape', feature_vec.shape # feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)])) ########### !!!!! tmp block the previous state feature.. # print 'feature shape', feat_vec.shape return feat_vec, feature_index_list
def naive_search_with_np(n): prime_numbers = np_array([2]) i = prime_numbers[0] while len(prime_numbers) != n: i += 1 for d in np_arange(2, i): i_is_prime = True if i%d == 0: i_is_prime = False break if i_is_prime: prime_numbers = np_append(prime_numbers, i) print('Байт:', getsizeof(prime_numbers))
def updateAlgoData(): """ Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps). 1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP). 2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver. 3) Incr clustering inserted rawdata for direct algo use. """ dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) ver_wpp = wppdb.getRawdataVersion() # Sync rawdata into wpp_uprecsinfo from remote FTP server. print 'Probing rawdata version > [%s]' % ver_wpp vers_fpp,localbzs = syncFtpUprecs(FTPCFG, ver_wpp) if not vers_fpp: print 'Not found!'; continue else: print 'Found new vers: %s' % vers_fpp # Handle each bzip2 file. alerts = {'vers':[], 'details':''} tab_rd = 'wpp_uprecsinfo' for bzfile in localbzs: # Filter out the ver_uprecs info from the name of each bzip file. ver_bzfile = bzfile.split('_')[-1].split('.')[0] # Update ver_uprecs in wpp_uprecsver to ver_bzfile. wppdb.setRawdataVersion(ver_bzfile) print '%s\nUpdate ver_uprecs -> [%s]' % ('-'*40, ver_bzfile) # Decompress bzip2. sys.stdout.write('Decompress & append rawdata ... ') csvdat = csv.reader( BZ2File(bzfile) ) try: indat = np_array([ line for line in csvdat ]) except csv.Error, e: sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e)) # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp. append_info = np_array([ [ver_bzfile,0,0] for i in xrange(len(indat)) ]) indat_withvers = np_append(indat, append_info, axis=1).tolist(); print 'Done' # Import csv into wpp_uprecsinfo. try: sys.stdout.write('Import rawdata: ') wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True) except Exception, e: _lineno = sys._getframe().f_lineno _file = sys._getframe().f_code.co_filename alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \ (ver_bzfile, _file, _lineno, str(e).replace('\n', ' ')) alerts['vers'].append(ver_bzfile) print 'ERROR: Insert Rawdata Failed!' continue
def gen_feature(self, sent, pos, prev2_label, prev_label): """ get features vector from self.syn0 matrix get words index :param sent: :param pos: :param prev2_label: :param prev_label: :return: """ u, u_1, u_2, u1, u2, b_1, b_2, b1, b2 = self.gen_unigram_bigram(sent, pos) ngram_feature_list = [u, u_1, u_2, u1, u2] + [''.join(b) for b in [b_1, b_2, b1, b2]] if self.no_bigram_feature: ngram_feature_list = ngram_feature_list[:-4] if self.no_unigram_feature: ngram_feature_list = ngram_feature_list[5:] state_feature_list = [self.su_prefix + self.state_varient[int(item[0])] + item[1] for item in zip([prev2_label, prev_label], [u_2, u_1])] if not self.no_sb_feature: state_feature_list.append( self.sb_prefix + self.state_varient[int(prev_label)] + ''.join(b_1)) # change the bigram state def if self.no_action_feature: feat_list = ngram_feature_list else: feat_list = ngram_feature_list + state_feature_list feature_index_list = map(self.word2index, feat_list) feature_vec = self.syn0[feature_index_list].ravel() if self.no_binary_action_feature: feat_vec = feature_vec else: feat_vec = np_append(feature_vec, asanyarray( [float(prev2_label), float(prev_label)])) ########### !!!!! tmp block the previous state feature.. # print 'feature shape', feat_vec.shape # print 'feature_shape', feature_vec.shape # feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)])) ########### !!!!! tmp block the previous state feature.. # print 'feature shape', feat_vec.shape return feat_vec, feature_index_list
def mutation_ols_ls_global_margin(X, y, nn, n_added_neurons, random_state): hidden_semantics = build_hidden_semantics(nn, n_added_neurons, y.shape[0]) y_prob = nn.get_predictions().copy() softmax(y_prob) ce_loss = -xlogy(y, y_prob) m = ce_loss.max(axis=1) #=========================================================================== # am = ce_loss.argmax(axis=1) #=========================================================================== sample_weights = 1 + m #=============================================================================== # cut_off = 75 # cut_off = percentile(m, cut_off) # above = where(m > cut_off)[0] # above_count = above.shape[0] # below_or_equal_count = n_samples - above_count # #=========================================================================== # # below_or_equal = where(m <= cut_off)[0] # # below_or_equal_count = below_or_equal.shape[0] # #=========================================================================== # # sample_weights = ones(n_samples) # sample_weights[above] = below_or_equal_count / above_count #=============================================================================== for output_index, output_neuron in enumerate(nn.output_layer): output_delta_y = y[:, output_index] - nn.get_predictions()[:, output_index] reg = LinearRegression().fit(hidden_semantics, output_delta_y, sample_weights) optimal_weights = np_append(reg.coef_.T, reg.intercept_) #======================================================================= # print('\n\toptimal_weights [min, mean, max]: [%.5f, %.5f, %.5f]' % (optimal_weights.min(), optimal_weights.mean(), optimal_weights.max())) #======================================================================= # Update connections with the learning step value: for i in range(n_added_neurons): output_neuron.input_connections[-n_added_neurons + i].weight = optimal_weights[i] output_neuron.increment_bias(optimal_weights[-1])
def gen_feature(self, sent, pos, prev2_label, prev_label): c0, c1, c2, c3, c4, b1, b2, b3, b4 = self.gen_uni_gram_bigram( sent, pos) ngram_feature_list = [c0, c1, c2, c3, c4 ] + [''.join(b) for b in [b1, b2, b3, b4]] if self.no_bigram_feature: ngram_feature_list = ngram_feature_list[:-4] if self.no_unigram_feature: ngram_feature_list = ngram_feature_list[5:] state_feature_list = [ self.su_prefix + self.state_varient[int(item[0])] + item[1] for item in zip([prev2_label, prev_label], [c2, c1]) ] if not self.no_sb_feature: state_feature_list.append( self.sb_prefix + self.state_varient[int(prev_label)] + ''.join(b1)) #change the bigram state def if self.no_action_feature: feat_list = ngram_feature_list else: feat_list = ngram_feature_list + state_feature_list feature_index_list = map(self.word2index, feat_list) feature_vec = self.syn0[feature_index_list].ravel() if self.no_binary_action_feature: feat_vec = feature_vec else: feat_vec = np_append( feature_vec, asanyarray([ float(prev2_label), float(prev_label) ])) ########### !!!!! tmp block the previous state feature.. #print 'feature shape', feat_vec.shape #print 'feature_shape', feature_vec.shape #feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)])) ########### !!!!! tmp block the previous state feature.. #print 'feature shape', feat_vec.shape return feat_vec, feature_index_list
def wheel_weight_algorithm(ww_wheel_value): try: # axle_wheel_value = ww_wheel_value.transpose((1, 0)) # 经验值:742.585 对应 2.1t ==> 重量系数:2.1 / 742.585 # sum_left_right = 742.585 # sum_left_right = 789.85 sum_left_right = round( sum(sum(ww_wheel_value)) / len(ww_wheel_value), 12) # 使用固定值,才能确保重量,否则只能对标到2.1t wheelset_standard_weight = standard_left_weight + standard_right_weight + standard_axle_weight weight_coefficient = wheelset_standard_weight / sum_left_right left_wheel_coefficient = standard_left_weight / ( standard_left_weight + standard_axle_weight / 2) right_wheel_coefficient = standard_right_weight / ( standard_right_weight + standard_axle_weight / 2) axle_coefficient = standard_axle_weight / wheelset_standard_weight wheel_axle_weight = around(ww_wheel_value * weight_coefficient, 4) wheel_axle_weight_tran = wheel_axle_weight.transpose((1, 0)) # 轮对的重量 wheelset_weight = around(sum(wheel_axle_weight_tran), 4) # 轴的重量 axle_weight = around(wheelset_weight * axle_coefficient, 3) # 车轮的重量 left_wheel_weight = around( wheel_axle_weight_tran[0] * left_wheel_coefficient, 3) right_wheel_weight = around( wheel_axle_weight_tran[1] * right_wheel_coefficient, 3) wheel_weight = np_append(left_wheel_weight, right_wheel_weight).reshape( (2, -1)).transpose((1, 0)) return [wheel_weight, axle_weight, wheelset_weight] except Exception as e: info(e)
def gen_feature(self,sent, pos, prev2_label, prev_label): c0, c1, c2, c3, c4, b1, b2, b3, b4 = self.gen_uni_gram_bigram(sent, pos) ngram_feature_list= [c0, c1, c2, c3, c4]+[''.join(b) for b in [b1,b2,b3,b4]] if self.no_bigram_feature: ngram_feature_list =ngram_feature_list[:-4] if self.no_unigram_feature: ngram_feature_list = ngram_feature_list[5:] state_feature_list=[self.su_prefix+self.state_varient[int(item[0])]+item[1] for item in zip([prev2_label, prev_label], [c2, c1])] if not self.no_sb_feature: state_feature_list.append(self.sb_prefix+self.state_varient[int(prev_label)]+''.join(b1)) #change the bigram state def if self.no_action_feature: feat_list = ngram_feature_list else: feat_list = ngram_feature_list+state_feature_list feature_index_list = map(self.word2index, feat_list) feature_vec = self.syn0[feature_index_list].ravel() if self.no_binary_action_feature: feat_vec = feature_vec else: feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)])) ########### !!!!! tmp block the previous state feature.. #print 'feature shape', feat_vec.shape #print 'feature_shape', feature_vec.shape #feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)])) ########### !!!!! tmp block the previous state feature.. #print 'feature shape', feat_vec.shape return feat_vec, feature_index_list
def load(satscene, calibrate=True, area_extent=None, read_basic_or_detailed='both', **kwargs): """Load MSG SEVIRI High Resolution Wind (HRW) data from hdf5 format. """ # Read config file content conf = ConfigParser() conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg")) values = {"orbit": satscene.orbit, "satname": satscene.satname, "number": satscene.number, "instrument": satscene.instrument_name, "satellite": satscene.fullname } LOG.info("assume seviri-level5") print "... assume seviri-level5" satscene.add_to_history("hdf5 data read by mpop/nwcsaf_hrw_hdf.py") # end of scan time 4 min after start end_time = satscene.time_slot + datetime.timedelta(minutes=4) # area !!! satscene.area filename = os.path.join( satscene.time_slot.strftime(conf.get("seviri-level5", "dir", raw=True)), satscene.time_slot.strftime(conf.get("seviri-level5", "filename", raw=True)) % values ) # define classes before we search for files (in order to return empty class if no file is found) HRW_basic = HRW_class() HRW_basic.detailed = False HRW_basic.date = satscene.time_slot HRW_detailed = HRW_class() HRW_detailed.detailed = True HRW_detailed.date = satscene.time_slot print "... search for file: ", filename filenames=glob(str(filename)) if len(filenames) != 0: if len(filenames) > 1: print "*** Warning, more than 1 datafile found: ", filenames filename = filenames[0] print("... read data from %s" % str(filename)) # create an instant of the HRW_class m_per_s_to_knots = 1.944 ## limit channels to read #hrw_channels=['HRV'] # limit basic or detailed or both #read_basic_or_detailed='detailed' #read_basic_or_detailed='basic' with h5py.File(filename,'r') as hf: #print hf.attrs.keys() #print hf.attrs.values() region_name = hf.attrs['REGION_NAME'].replace("_", "") print "... read HRW data for region ", region_name LOG.info("... read HRW data for region "+region_name) sat_ID = GP_IDs[int(hf.attrs["GP_SC_ID"])] print "... derived from Meteosat ", sat_ID LOG.info("... derived from Meteosat "+sat_ID) # print('List of arrays in this file: \n', hf.keys()), len(hf.keys()) if len(hf.keys()) == 0: print "*** Warning, empty file ", filename print "" else: for key in hf.keys(): if key[4:9] == "BASIC": if 'read_basic_or_detailed' in locals(): if read_basic_or_detailed.lower() == "detailed": continue HRW_data = HRW_basic # shallow copy elif key[4:12] == "DETAILED": if 'read_basic_or_detailed' in locals(): if read_basic_or_detailed.lower() == "basic": continue HRW_data = HRW_detailed # shallow copy hrw_chn = dict_channel[key[len(key)-9:]] if 'hrw_channels' in locals(): if hrw_channels != None: if hrw_chn not in hrw_channels: print "... "+hrw_chn+" is not in hrw_channels", hrw_channels print " skip reading this channel" continue # read all data channel = hf.get(key) # print '... read wind vectors of channel ', channel.name, hrw_chn # print " i lon lat speed[kn] dir pressure" #for i in range(channel.len()): # print '%3d %10.7f %10.7f %7.2f %7.1f %8.1f' % (channel[i]['wind_id'], channel[i]['lon'], channel[i]['lat'], \ # channel[i]['wind_speed']*m_per_s_to_knots, \ # channel[i]['wind_direction'], channel[i]['pressure']) # create string array with channel names channel_chararray = np_empty(channel.len(), dtype='|S6') channel_chararray[:] = hrw_chn HRW_data.channel = np_append(HRW_data.channel , channel_chararray ) HRW_data.wind_id = np_append(HRW_data.wind_id , channel[:]['wind_id'] ) HRW_data.prev_wind_id = np_append(HRW_data.prev_wind_id , channel[:]['prev_wind_id'] ) HRW_data.segment_X = np_append(HRW_data.segment_X , channel[:]['segment_X'] ) HRW_data.segment_Y = np_append(HRW_data.segment_Y , channel[:]['segment_Y'] ) HRW_data.t_corr_method = np_append(HRW_data.t_corr_method , channel[:]['t_corr_method'] ) HRW_data.lon = np_append(HRW_data.lon , channel[:]['lon'] ) HRW_data.lat = np_append(HRW_data.lat , channel[:]['lat'] ) HRW_data.dlon = np_append(HRW_data.dlon , channel[:]['dlon'] ) HRW_data.dlat = np_append(HRW_data.dlat , channel[:]['dlat'] ) HRW_data.pressure = np_append(HRW_data.pressure , channel[:]['pressure'] ) HRW_data.wind_speed = np_append(HRW_data.wind_speed , channel[:]['wind_speed'] ) HRW_data.wind_direction = np_append(HRW_data.wind_direction , channel[:]['wind_direction'] ) HRW_data.temperature = np_append(HRW_data.temperature , channel[:]['temperature'] ) HRW_data.conf_nwp = np_append(HRW_data.conf_nwp , channel[:]['conf_nwp'] ) HRW_data.conf_no_nwp = np_append(HRW_data.conf_no_nwp , channel[:]['conf_no_nwp'] ) HRW_data.t_type = np_append(HRW_data.t_type , channel[:]['t_type'] ) HRW_data.t_level_method = np_append(HRW_data.t_level_method , channel[:]['t_level_method'] ) HRW_data.t_winds = np_append(HRW_data.t_winds , channel[:]['t_winds'] ) HRW_data.t_corr_test = np_append(HRW_data.t_corr_test , channel[:]['t_corr_test'] ) HRW_data.applied_QI = np_append(HRW_data.applied_QI , channel[:]['applied_QI'] ) HRW_data.NWP_wind_levels = np_append(HRW_data.NWP_wind_levels , channel[:]['NWP_wind_levels'] ) HRW_data.num_prev_winds = np_append(HRW_data.num_prev_winds , channel[:]['num_prev_winds'] ) HRW_data.orographic_index = np_append(HRW_data.orographic_index, channel[:]['orographic_index'] ) HRW_data.cloud_type = np_append(HRW_data.cloud_type , channel[:]['cloud_type'] ) HRW_data.wind_channel = np_append(HRW_data.wind_channel , channel[:]['wind_channel'] ) HRW_data.correlation = np_append(HRW_data.correlation , channel[:]['correlation'] ) HRW_data.pressure_error = np_append(HRW_data.pressure_error , channel[:]['pressure_error'] ) # sort according to wind_id inds = HRW_data.wind_id.argsort() HRW_data.subset(inds) # changes HRW_data itself # sorting without conversion to numpy arrays #[e for (wid,pwid) in sorted(zip(HRW_data.wind_id,HRW_data.prev_wind_id))] else: print "*** Error, no file found" print "" sat_ID = "no file" # but we continue the program in order to add an empty channel below ## filter data according to the given optional arguments #n1 = str(HRW_data.channel.size) #HRW_data = HRW_data.filter(**kwargs) #print " apply filters "+' ('+n1+'->'+str(HRW_data.channel.size)+')' chn_name="HRW" satscene[chn_name].HRW_basic = HRW_basic.filter(**kwargs) # returns new object (deepcopied and filtered) satscene[chn_name].HRW_detailed = HRW_detailed.filter(**kwargs) # returns new object (deepcopied and filtered) satscene[chn_name].info['units'] = 'm/s' satscene[chn_name].info['satname'] = 'meteosat' satscene[chn_name].info['satnumber'] = sat_ID satscene[chn_name].info['instrument_name'] = 'seviri' satscene[chn_name].info['time'] = satscene.time_slot satscene[chn_name].info['is_calibrated'] = True
def mutation_ols_ls_local_margin(X, y, nn, n_added_neurons, random_state): n_samples = y.shape[0] hidden_semantics = build_hidden_semantics(nn, n_added_neurons, n_samples) for output_index, output_neuron in enumerate(nn.output_layer): output_delta_y = y[:, output_index] - nn.get_predictions()[:, output_index] output_y = y[:, output_index] output_y_class_1_indices = where(output_y == 1)[0] #======================================================================= # output_y_class_1_count = output_y_class_1_indices.shape[0] #======================================================================= output_y_class_0_indices = where(output_y == 0)[0] #======================================================================= # output_y_class_0_count = output_y_class_0_indices.shape[0] #======================================================================= margin = 0.25 class_1_outliers_indices = where( output_delta_y[output_y_class_1_indices] < margin)[0] class_0_outliers_indices = where( output_delta_y[output_y_class_0_indices] > -margin)[0] #=============================================================== # outliers_count = class_1_outliers_indices.shape[0] + class_0_outliers_indices.shape[0] # inliers_count = n_samples - outliers_count #=============================================================== class_1_inliers_indices = where( output_delta_y[output_y_class_1_indices] >= margin)[0] class_0_inliers_indices = where( output_delta_y[output_y_class_0_indices] <= -margin)[0] inliers_count = class_1_inliers_indices.shape[ 0] + class_0_inliers_indices.shape[0] outliers_count = n_samples - inliers_count sample_weights = ones(n_samples) if inliers_count > 0 and outliers_count > 0: if outliers_count >= inliers_count: weight = outliers_count / inliers_count else: weight = inliers_count / outliers_count sample_weights[ output_y_class_1_indices[class_1_inliers_indices]] = weight sample_weights[ output_y_class_0_indices[class_0_inliers_indices]] = weight """ > 1 or < 0: delta of 0 """ class_1_outliers_indices = where( output_delta_y[output_y_class_1_indices] < 0)[0] class_0_outliers_indices = where( output_delta_y[output_y_class_0_indices] > 0)[0] output_delta_y[ output_y_class_1_indices[class_1_outliers_indices]] = 0 output_delta_y[ output_y_class_0_indices[class_0_outliers_indices]] = 0 #======================================================================= # else: # print("[Debug] Else") # print() #======================================================================= reg = LinearRegression().fit(hidden_semantics, output_delta_y, sample_weights) optimal_weights = np_append(reg.coef_.T, reg.intercept_) #======================================================================= # print('\n\toptimal_weights [min, mean, max]: [%.5f, %.5f, %.5f]' % (optimal_weights.min(), optimal_weights.mean(), optimal_weights.max())) #======================================================================= # Update connections with the learning step value: for i in range(n_added_neurons): output_neuron.input_connections[-n_added_neurons + i].weight = optimal_weights[i] output_neuron.increment_bias(optimal_weights[-1])
def updateAlgoData(): """ Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps). 1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP). 2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver. 3) Incr clustering inserted rawdata for direct algo use. """ dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) ver_wpp = wppdb.getRawdataVersion() # Sync rawdata into wpp_uprecsinfo from remote FTP server. print 'Probing rawdata version > [%s]' % ver_wpp vers_fpp, localbzs = syncFtpUprecs(FTPCFG, ver_wpp) if not vers_fpp: print 'Not found!' continue else: print 'Found new vers: %s' % vers_fpp # Handle each bzip2 file. alerts = {'vers': [], 'details': ''} tab_rd = 'wpp_uprecsinfo' for bzfile in localbzs: # Filter out the ver_uprecs info from the name of each bzip file. ver_bzfile = bzfile.split('_')[-1].split('.')[0] # Update ver_uprecs in wpp_uprecsver to ver_bzfile. wppdb.setRawdataVersion(ver_bzfile) print '%s\nUpdate ver_uprecs -> [%s]' % ('-' * 40, ver_bzfile) # Decompress bzip2. sys.stdout.write('Decompress & append rawdata ... ') csvdat = csv.reader(BZ2File(bzfile)) try: indat = np_array([line for line in csvdat]) except csv.Error, e: sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e)) # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp. append_info = np_array([[ver_bzfile, 0, 0] for i in xrange(len(indat))]) indat_withvers = np_append(indat, append_info, axis=1).tolist() print 'Done' # Import csv into wpp_uprecsinfo. try: sys.stdout.write('Import rawdata: ') wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True) except Exception, e: _lineno = sys._getframe().f_lineno _file = sys._getframe().f_code.co_filename alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \ (ver_bzfile, _file, _lineno, str(e).replace('\n', ' ')) alerts['vers'].append(ver_bzfile) print 'ERROR: Insert Rawdata Failed!' continue # Incr clustering. # file described by fd_csv contains all *location enabled* rawdata from wpp_uprecsinfo. strWhere = 'WHERE lat!=0 and lon!=0 and ver_uprecs=%s' % ver_bzfile cols_ignored = 3 # 3 status cols to be ignored during clustering: ver_uprecs,area_ok,area_try. cols_select = ','.join(wppdb.tbl_field[tab_rd][:-cols_ignored]) sql = wppdb.sqls['SQL_SELECT'] % (cols_select, '%s %s' % (tab_rd, strWhere)) rdata_loc = wppdb.execute(sql=sql, fetch_one=False) if not rdata_loc: continue # NO FPs has location info. str_rdata_loc = '\n'.join( [','.join([str(col) for col in fp]) for fp in rdata_loc]) fd_csv = StringIO(str_rdata_loc) print 'FPs for Incr clustering selected & ready' n_inserts = doClusterIncr(fd_csv=fd_csv, wppdb=wppdb, verb=False) print 'AlgoData added: [%s] clusters, [%s] FPs' % ( n_inserts['n_newcids'], n_inserts['n_newfps'])
def run(self, scaffold_stats, num_clusters, num_components, K, no_coverage, no_pca, iterations, genome_file, output_dir): """Calculate statistics for genomes. Parameters ---------- scaffold_stats : ScaffoldStats Statistics for individual scaffolds. num_clusters : int Number of cluster to form. num_components : int Number of PCA components to consider. K : int K-mer size to use for calculating genomic signature. no_coverage : boolean Flag indicating if coverage information should be used during clustering. no_pca : boolean Flag indicating if PCA of genomic signature should be calculated. iterations : int Iterations of clustering to perform. genome_file : str Sequences being clustered. output_dir : str Directory to write results. """ # get GC and mean coverage for each scaffold in genome self.logger.info('') self.logger.info(' Determining mean coverage and genomic signatures.') signatures = GenomicSignature(K) genome_stats = [] signature_matrix = [] seqs = seq_io.read(genome_file) for seq_id, seq in seqs.iteritems(): stats = scaffold_stats.stats[seq_id] if not no_coverage: genome_stats.append((np_mean(stats.coverage))) else: genome_stats.append(()) if K == 0: pass elif K == 4: signature_matrix.append(stats.signature) else: sig = signatures.seq_signature(seq) total_kmers = sum(sig) for i in xrange(0, len(sig)): sig[i] = float(sig[i]) / total_kmers signature_matrix.append(sig) # calculate PCA of tetranucleotide signatures if K != 0: if not no_pca: self.logger.info(' Calculating PCA of genomic signatures.') pc, variance = self.pca(signature_matrix) self.logger.info(' First %d PCs capture %.1f%% of the variance.' % (num_components, sum(variance[0:num_components]) * 100)) for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, pc[i][0:num_components]) else: self.logger.info(' Using complete genomic signature.') for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, signature_matrix[i]) # whiten data if feature matrix contains coverage and genomic signature data if not no_coverage and K != 0: print ' Whitening data.' genome_stats = whiten(genome_stats) else: genome_stats = np_array(genome_stats) # cluster self.logger.info(' Partitioning genome into %d clusters.' % num_clusters) bError = True while bError: try: bError = False _centroids, labels = kmeans2(genome_stats, num_clusters, iterations, minit='points', missing='raise') except ClusterError: bError = True for k in range(num_clusters): self.logger.info(' Placed %d sequences in cluster %d.' % (sum(labels == k), (k + 1))) # write out clusters genome_id = remove_extension(genome_file) for k in range(num_clusters): fout = open(os.path.join(output_dir, genome_id + '_c%d' % (k + 1) + '.fna'), 'w') for i in np_where(labels == k)[0]: seq_id = seqs.keys()[i] fout.write('>' + seq_id + '\n') fout.write(seqs[seq_id] + '\n') fout.close()
def __add__(self, HRW_class2): HRW_new = HRW_class() HRW_new.date = self.date # !!! does not make sense !!! HRW_new.detailed = self.detailed # !!! does not make sense !!! HRW_new.channel = np_append(self.channel, HRW_class2.channel) HRW_new.wind_id = np_append(self.wind_id, HRW_class2.wind_id) HRW_new.prev_wind_id = np_append(self.prev_wind_id, HRW_class2.prev_wind_id) HRW_new.segment_X = np_append(self.segment_X, HRW_class2.segment_X) HRW_new.segment_Y = np_append(self.segment_Y, HRW_class2.segment_Y) HRW_new.t_corr_method = np_append(self.t_corr_method, HRW_class2.t_corr_method) HRW_new.lon = np_append(self.lon, HRW_class2.lon) HRW_new.lat = np_append(self.lat, HRW_class2.lat) HRW_new.dlon = np_append(self.dlon, HRW_class2.dlon) HRW_new.dlat = np_append(self.dlat, HRW_class2.dlat) HRW_new.pressure = np_append(self.pressure, HRW_class2.pressure) HRW_new.wind_speed = np_append(self.wind_speed, HRW_class2.wind_speed) HRW_new.wind_direction = np_append(self.wind_direction, HRW_class2.wind_direction) HRW_new.temperature = np_append(self.temperature, HRW_class2.temperature) HRW_new.conf_nwp = np_append(self.conf_nwp, HRW_class2.conf_nwp) HRW_new.conf_no_nwp = np_append(self.conf_no_nwp, HRW_class2.conf_no_nwp) HRW_new.t_type = np_append(self.t_type, HRW_class2.t_type) HRW_new.t_level_method = np_append(self.t_level_method, HRW_class2.t_level_method) HRW_new.t_winds = np_append(self.t_winds, HRW_class2.t_winds) HRW_new.t_corr_test = np_append(self.t_corr_test, HRW_class2.t_corr_test) HRW_new.applied_QI = np_append(self.applied_QI, HRW_class2.applied_QI) HRW_new.NWP_wind_levels = np_append(self.NWP_wind_levels, HRW_class2.NWP_wind_levels) HRW_new.num_prev_winds = np_append(self.num_prev_winds, HRW_class2.num_prev_winds) HRW_new.orographic_index = np_append(self.orographic_index, HRW_class2.orographic_index) HRW_new.cloud_type = np_append(self.cloud_type, HRW_class2.cloud_type) HRW_new.wind_channel = np_append(self.wind_channel, HRW_class2.wind_channel) HRW_new.correlation = np_append(self.correlation, HRW_class2.correlation) HRW_new.pressure_error = np_append(self.pressure_error, HRW_class2.pressure_error) return HRW_new
def kmeans(self, scaffold_stats, num_clusters, num_components, K, no_coverage, no_pca, iterations, genome_file, output_dir): """Cluster genome with k-means. Parameters ---------- scaffold_stats : ScaffoldStats Statistics for individual scaffolds. num_clusters : int Number of cluster to form. num_components : int Number of PCA components to consider. K : int K-mer size to use for calculating genomic signature no_coverage : boolean Flag indicating if coverage information should be used during clustering. no_pca : boolean Flag indicating if PCA of genomic signature should be calculated. iterations: int iterations to perform during clustering genome_file : str Sequences being clustered. output_dir : str Directory to write results. """ # get GC and mean coverage for each scaffold in genome self.logger.info('Determining mean coverage and genomic signatures.') signatures = GenomicSignature(K) genome_stats = [] signature_matrix = [] seqs = seq_io.read(genome_file) for seq_id, seq in seqs.items(): stats = scaffold_stats.stats[seq_id] if not no_coverage: genome_stats.append((np_mean(stats.coverage))) else: genome_stats.append(()) if K == 0: pass elif K == 4: signature_matrix.append(stats.signature) else: sig = signatures.seq_signature(seq) total_kmers = sum(sig) for i in range(0, len(sig)): sig[i] = float(sig[i]) / total_kmers signature_matrix.append(sig) # calculate PCA of signatures if K != 0: if not no_pca: self.logger.info('Calculating PCA of genomic signatures.') pc, variance = self.pca(signature_matrix) self.logger.info( 'First {:,} PCs capture {:.1f}% of the variance.'.format( num_components, sum(variance[0:num_components]) * 100)) for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, pc[i][0:num_components]) else: self.logger.info('Using complete genomic signature.') for i, stats in enumerate(genome_stats): genome_stats[i] = np_append(stats, signature_matrix[i]) # whiten data if feature matrix contains coverage and genomic signature data if not no_coverage and K != 0: self.logger.info('Whitening data.') genome_stats = whiten(genome_stats) else: genome_stats = np_array(genome_stats) # cluster self.logger.info( 'Partitioning genome into {:,} clusters.'.format(num_clusters)) bError = True while bError: try: bError = False _centroids, labels = kmeans2(genome_stats, num_clusters, iterations, minit='points', missing='raise') except ClusterError: bError = True for k in range(num_clusters): self.logger.info('Placed {:,} sequences in cluster {:,}.'.format( sum(labels == k), (k + 1))) # write out clusters genome_id = remove_extension(genome_file) for k in range(num_clusters): fout = open( os.path.join(output_dir, genome_id + '_c%d' % (k + 1) + '.fna'), 'w') for i in np_where(labels == k)[0]: seq_id = seqs.keys()[i] fout.write('>' + seq_id + '\n') fout.write(seqs[seq_id] + '\n') fout.close()
def predict_sigle_position(self, sent, pos, prev2_label, prev_label): flag = False feature_vec, feature_index_list = self.gen_feature( sent, pos, prev2_label, prev_label) if self.train_mode and self.drop_out: to_block = set( permutation(arange(self.non_fixed_param))[:self.dropout_size]) #print 'to_block',list(to_block)[:10] block = array( [0 if zzz in to_block else 1 for zzz in range(self.pred_size)]) feature_vec = multiply(feature_vec, block) elif self.drop_out: # for dropout mode at testing time... feature_vec = (1 - self.dropout_rate) * feature_vec block = None else: block = None if block: print 'block=', block if flag: print 'pos, char=', pos, sent[pos] print 'feat_index_list=', feature_index_list, ';features are:', ' '.join( [self.index2word[ind] for ind in feature_index_list]) c0 = sent[pos] if pos < len(sent) else self.END pred_tuple = tuple( [self.su_prefix + varient + c0 for varient in self.state_varient]) if pred_tuple[0] in self.vocab and pred_tuple[1] in self.vocab: pass else: pred_tuple = None if self.train_mode: print 'Unknown candidate! Should NOT happen during training!' assert False pred_tuple2 = tuple([self.label0_as_vocab, self.label1_as_vocab]) softmax_score = None if pred_tuple: pred_index_list = [self.vocab[pred].index for pred in pred_tuple] pred_matrix = self.syn1neg[pred_index_list] if block is not None: pred_matrix = multiply(block, pred_matrix) elif self.drop_out: pred_matrix = (1 - self.dropout_rate) * pred_matrix raw_score = exp(dot(feature_vec, pred_matrix.T)) softmax_score = raw_score / sum(raw_score) pred_index_list2 = [self.vocab[pred].index for pred in pred_tuple2] pred_matrix2 = self.syn1neg[pred_index_list2] if block is not None: pred_matrix2 = multiply(block, pred_matrix2) elif self.drop_out: pred_matrix = (1 - self.dropout_rate) * pred_matrix2 raw_score2 = exp(dot(feature_vec, pred_matrix2.T)) softmax_score2 = raw_score2 / sum(raw_score2) #print pred_matrix2.shape, pred_matrix.shape if pred_tuple: softmax_score2 = np_append(softmax_score2, softmax_score) pred_index_list2.extend(pred_index_list) pred_matrix2 = np_append(pred_matrix2, pred_matrix, axis=0) #print pred_matrix2.shape, pred_matrix.shape if flag: print 'pred index and item=', pred_index_list2, ' '.join( [self.index2word[ind] for ind in pred_index_list2]) return softmax_score2, feature_index_list, pred_index_list2, feature_vec, pred_matrix2
def renderTransCPData(self, fileName="", show=True, elev=45, azim=45, all=False, showAxis=False, primaryWidth=12, primarySpace=3, dpi=300, format='png', fig=None, highlight=None, restrictedBids=[], alpha=1, ignoreContigLengths=False): """Plot transformed data in 3D""" del_fig = False if (fig is None): fig = plt.figure() del_fig = True else: plt.clf() if (all): myAXINFO = { 'x': { 'i': 0, 'tickdir': 1, 'juggled': (1, 0, 2), 'color': (0, 0, 0, 0, 0) }, 'y': { 'i': 1, 'tickdir': 0, 'juggled': (0, 1, 2), 'color': (0, 0, 0, 0, 0) }, 'z': { 'i': 2, 'tickdir': 0, 'juggled': (0, 2, 1), 'color': (0, 0, 0, 0, 0) }, } ax = fig.add_subplot(131, projection='3d') sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect ax.azim = 0 ax.elev = 0 ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(132, projection='3d') sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect ax.azim = 90 ax.elev = 0 ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(133, projection='3d') sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect ax.azim = 0 ax.elev = 90 ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO else: ax = fig.add_subplot(111, projection='3d') if len(restrictedBids) == 0: if highlight is None: print("BF:", np_shape(self.transformedCP)) if ignoreContigLengths: sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') else: sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=np_sqrt(self.contigLengths), marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect else: #draw the opaque guys first """ sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect """ # now replot the highlighted guys disp_vals = np_array([]) disp_GCs = np_array([]) thrower = {} hide_vals = np_array([]) hide_GCs = np_array([]) num_points = 0 for bin in highlight: for row_index in bin.rowIndices: num_points += 1 disp_vals = np_append( disp_vals, self.transformedCP[row_index]) disp_GCs = np_append(disp_GCs, self.contigGCs[row_index]) thrower[row_index] = False # reshape disp_vals = np_reshape(disp_vals, (num_points, 3)) num_points = 0 for i in range(len(self.indices)): try: thrower[i] except KeyError: num_points += 1 hide_vals = np_append(hide_vals, self.transformedCP[i]) hide_GCs = np_append(hide_GCs, self.contigGCs[i]) # reshape hide_vals = np_reshape(hide_vals, (num_points, 3)) sc = ax.scatter(hide_vals[:, 0], hide_vals[:, 1], hide_vals[:, 2], edgecolors='none', c=hide_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect sc = ax.scatter(disp_vals[:, 0], disp_vals[:, 1], disp_vals[:, 2], edgecolors='none', c=disp_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=10., marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect print(np_shape(disp_vals), np_shape(hide_vals), np_shape(self.transformedCP)) # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) else: r_trans = np_array([]) r_cols = np_array([]) num_added = 0 for i in range(len(self.indices)): if self.binIds[i] not in restrictedBids: r_trans = np_append(r_trans, self.transformedCP[i]) r_cols = np_append(r_cols, self.contigGCs[i]) num_added += 1 r_trans = np_reshape(r_trans, (num_added, 3)) print(np_shape(r_trans)) #r_cols = np_reshape(r_cols, (num_added,3)) sc = ax.scatter(r_trans[:, 0], r_trans[:, 1], r_trans[:, 2], edgecolors='none', c=r_cols, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) ax.azim = azim ax.elev = elev ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) if (not showAxis): ax.set_axis_off() if (fileName != ""): try: if (all): fig.set_size_inches(3 * primaryWidth + 2 * primarySpace, primaryWidth) else: fig.set_size_inches(primaryWidth, primaryWidth) plt.savefig(fileName, dpi=dpi, format=format) except: print("Error saving image", fileName, exc_info()[0]) raise elif (show): try: plt.show() except: print("Error showing image", exc_info()[0]) raise if del_fig: plt.close(fig) del fig
def renderTransCPData(self, fileName="", show=True, elev=45, azim=45, all=False, showAxis=False, primaryWidth=12, primarySpace=3, dpi=300, format='png', fig=None, highlight=None, restrictedBids=[], alpha=1, ignoreContigLengths=False): """Plot transformed data in 3D""" del_fig = False if(fig is None): fig = plt.figure() del_fig = True else: plt.clf() if(all): myAXINFO = { 'x': {'i': 0, 'tickdir': 1, 'juggled': (1, 0, 2), 'color': (0, 0, 0, 0, 0)}, 'y': {'i': 1, 'tickdir': 0, 'juggled': (0, 1, 2), 'color': (0, 0, 0, 0, 0)}, 'z': {'i': 2, 'tickdir': 0, 'juggled': (0, 2, 1), 'color': (0, 0, 0, 0, 0)}, } ax = fig.add_subplot(131, projection='3d') sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect ax.azim = 0 ax.elev = 0 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(132, projection='3d') sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect ax.azim = 90 ax.elev = 0 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(133, projection='3d') sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect ax.azim = 0 ax.elev = 90 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO else: ax = fig.add_subplot(111, projection='3d') if len(restrictedBids) == 0: if highlight is None: print "BF:", np_shape(self.transformedCP) if ignoreContigLengths: sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') else: sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=np_sqrt(self.contigLengths), marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect else: #draw the opaque guys first """ sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect """ # now replot the highlighted guys disp_vals = np_array([]) disp_GCs = np_array([]) thrower = {} hide_vals = np_array([]) hide_GCs = np_array([]) num_points = 0 for bin in highlight: for row_index in bin.rowIndices: num_points += 1 disp_vals = np_append(disp_vals, self.transformedCP[row_index]) disp_GCs = np_append(disp_GCs, self.contigGCs[row_index]) thrower[row_index] = False # reshape disp_vals = np_reshape(disp_vals, (num_points, 3)) num_points = 0 for i in range(len(self.indices)): try: thrower[i] except KeyError: num_points += 1 hide_vals = np_append(hide_vals, self.transformedCP[i]) hide_GCs = np_append(hide_GCs, self.contigGCs[i]) # reshape hide_vals = np_reshape(hide_vals, (num_points, 3)) sc = ax.scatter(hide_vals[:,0], hide_vals[:,1], hide_vals[:,2], edgecolors='none', c=hide_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect sc = ax.scatter(disp_vals[:,0], disp_vals[:,1], disp_vals[:,2], edgecolors='none', c=disp_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=10., marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect print np_shape(disp_vals), np_shape(hide_vals), np_shape(self.transformedCP) # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) else: r_trans = np_array([]) r_cols=np_array([]) num_added = 0 for i in range(len(self.indices)): if self.binIds[i] not in restrictedBids: r_trans = np_append(r_trans, self.transformedCP[i]) r_cols = np_append(r_cols, self.contigGCs[i]) num_added += 1 r_trans = np_reshape(r_trans, (num_added,3)) print np_shape(r_trans) #r_cols = np_reshape(r_cols, (num_added,3)) sc = ax.scatter(r_trans[:,0], r_trans[:,1], r_trans[:,2], edgecolors='none', c=r_cols, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) ax.azim = azim ax.elev = elev ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) if(not showAxis): ax.set_axis_off() if(fileName != ""): try: if(all): fig.set_size_inches(3*primaryWidth+2*primarySpace,primaryWidth) else: fig.set_size_inches(primaryWidth,primaryWidth) plt.savefig(fileName,dpi=dpi,format=format) except: print "Error saving image",fileName, exc_info()[0] raise elif(show): try: plt.show() except: print "Error showing image", exc_info()[0] raise if del_fig: plt.close(fig) del fig
def plotRegion(self, px, py, pz, fileName="", tag="", column=False): """Plot the region surrounding a point """ import matplotlib as mpl disp_vals = np_array([]) disp_cols = np_array([]) num_points = 0 # plot all points within span (z_lower, z_upper) = self.makeCoordRanges(pz, self.span) if column: z_lower = 0 z_upper = self.PM.scaleFactor - 1 (x_lower, x_upper) = self.makeCoordRanges(px, self.span) (y_lower, y_upper) = self.makeCoordRanges(py, self.span) for z in range(z_lower, z_upper): realz = self.PM.scaleFactor - z - 1 for x in range(x_lower, x_upper): for y in range(y_lower, y_upper): if (x, y, realz) in self.im2RowIndicies: for row_index in self.im2RowIndicies[(x, y, realz)]: if ( row_index not in self.PM.binnedRowIndicies and row_index not in self.PM.restrictedRowIndicies ): num_points += 1 disp_vals = np_append(disp_vals, self.PM.transformedCP[row_index]) disp_cols = np_append(disp_cols, self.PM.contigColours[row_index]) # make a black mark at the max values small_span = self.span / 2 (x_lower, x_upper) = self.makeCoordRanges(px, small_span) (y_lower, y_upper) = self.makeCoordRanges(py, small_span) (z_lower, z_upper) = self.makeCoordRanges(pz, small_span) for z in range(z_lower, z_upper): realz = self.PM.scaleFactor - z - 1 for x in range(x_lower, x_upper): for y in range(y_lower, y_upper): if (x, y, realz) in self.im2RowIndicies: for row_index in self.im2RowIndicies[(x, y, realz)]: if ( row_index not in self.PM.binnedRowIndicies and row_index not in self.PM.restrictedRowIndicies ): num_points += 1 disp_vals = np_append(disp_vals, self.PM.transformedCP[row_index]) disp_cols = np_append(disp_cols, htr(0, 0, 0)) # reshape disp_vals = np_reshape(disp_vals, (num_points, 3)) disp_cols = np_reshape(disp_cols, (num_points, 3)) fig = plt.figure() ax = fig.add_subplot(111, projection="3d") cm = mpl.colors.LinearSegmentedColormap("my_colormap", disp_cols, 1024) result = ax.scatter( disp_vals[:, 0], disp_vals[:, 1], disp_vals[:, 2], edgecolors=disp_cols, c=disp_cols, cmap=cm, marker="." ) title = str.join(" ", ["Focus at: (", str(px), str(py), str(self.PM.scaleFactor - pz - 1), ")\n", tag]) plt.title(title) if fileName != "": fig.set_size_inches(6, 6) plt.savefig(fileName, dpi=300) elif show: plt.show() plt.close(fig) del fig
def findArrayCenter(self, vals): """Find the center of the numpy array vals, return the index of the center""" # parameters current_val_max = -1 delta = 0 bounce_amount = 0.1 height = 0 last_val = 0 working = np_array([]) final_index = -1 # sort and normalise between 0 -> 1 sorted_indices = np_argsort(vals) vals_sorted = [vals[i] for i in sorted_indices] vals_sorted -= vals_sorted[0] if vals_sorted[-1] != 0: vals_sorted /= vals_sorted[-1] # print vals_sorted # run through in one direction for val in vals_sorted: # calculate delta delta = val - last_val # reduce the current value according to the delta value height = self.reduceViaDelta(height, bounce_amount, delta) # bounce the ball up height += bounce_amount # store the height working = np_append(working, height) final_index += 1 # save the last val last_val = val current_val_max = -1 height = 0 last_val = 0 # print "===W===" # print working # print "===E===" # run through in the reverse direction vals_sorted = vals_sorted[::-1] for val in vals_sorted: if last_val == 0: delta = 0 else: delta = last_val - val height = self.reduceViaDelta(height, bounce_amount, delta) height += bounce_amount # add to the old heights working[final_index] += height final_index -= 1 last_val = val # print working # print "==EEE==" # find the original index! return sorted_indices[np_argmax(working)]
def plot_coverage(self, plt_figure_title: str, title: str = "Coverage of {method_name}\nsamples: n1 = {sample_size1}, n2 = {sample_size2}", theme: plot_styles = "default", colors: Tuple[str,str,str,str,str] = ("gray", "purple", "white", "#b8df96", "green") ): """ Plots the `matplotlib.pyplot` figure given the data from previous coverage calculation and some captions and formatting. """ if self.coverage is None: raise NoCoverageException( "you have to calculate coverage first before plotting it") # this unpacked defaultdict trouble allows for optional formatting placeholders title = title.format(**defaultdict(str, method_name = self.method_name, sample_size1 = self.sample_size1, sample_size2 = self.sample_size2 )) plt.style.use(theme) """ Colorbar range depends on confidence level. Sets vmin to a point 10 times farther from 100% than the confidence for confidence=95% show colorbar from 50% to 100%; for confidence=99% show colorbar from 90% to 100%; for confidence=99.9% show colorbar from 99% to 100%; """ vmin = 100 - ( (100-(self.confidence*100))*10 ) vmax = 100 """ In LinearSegmentedColormap specified color points have to span from 0 to 1, where 0 would correspond to vmin, and 1 to vmax. the 5 specified colors will form a gradient by marking at points below. For confidence=95%: (50, 90, 95, 97.5, 100) For confidence=99%: (90, 98, 99, 99.5, 100) But because the following value is constant, visually, the colorbar itself will always have the same gradient regardless of the given `confidence` value """ nodes = (0.0, 0.8, 0.9, 0.95, 1.0) cmap = LinearSegmentedColormap.from_list("", list(zip(nodes, colors))) cmap.set_under(colors[0]) fig, ax = plt.subplots() fig.canvas.set_window_title(plt_figure_title) """ Would be great if matplotlib supported float128/longdouble. Instead, it converts data to float64 with a warning: "UserWarning: Casting input data from 'float128' to 'float64' for imshow" But! float128 precision could possible be utilized while using float64 in this case. If we were to display not the value (the coverage, 0-100), but the difference between the expected coverage (confidence) and the actual coverage, float64 would do a lot better. This can be done, but some adjustments have to be made to colorbar and labels. """ im = ax.imshow(float64(np_array(self.coverage)), cmap=cmap, norm=Normalize(float(vmin), vmax, True) ) # precision of "8" decimal places should be more than enough for colorbar ticks cb = fig.colorbar(im, format=ticker.FuncFormatter(lambda x, pos: (f'%.8f' % x).rstrip('0').rstrip('.'))) # plot a dashed black line over *confidence* point on a colorbar cb.ax.plot([0, 100], [self.confidence*100, self.confidence*100], '_:k') # rewriting autogenerated colorbar ticks by adding one that corresponds to `confidence` colorbar_ticks = cb.ax.get_yticks() colorbar_ticks = np_append(colorbar_ticks, float(self.confidence*100)) cb.set_ticks(colorbar_ticks) plt.title(title, fontsize="large", fontweight="bold") # this is reasonable number of ticks so that tick labels won't overlap max_num_xticks = 7 max_num_yticks = 20 xticks_period = int(np_ceil(len(self.proportions)/max_num_xticks)) yticks_period = int(np_ceil(len(self.proportions)/max_num_yticks)) xperiodic_probs = [float(v) for v in self.proportions[::xticks_period]] yperiodic_probs = [float(v) for v in self.proportions[::yticks_period]] ax.xaxis.set_major_locator(ticker.MultipleLocator(xticks_period)) ax.yaxis.set_major_locator(ticker.MultipleLocator(yticks_period)) ax.tick_params(axis='x', labelsize=8) ax.tick_params(axis='y', labelsize=9) ax.tick_params(top=False) ax.tick_params(right=False) # ax.xaxis.set_tick_params(labeltop=False) # ax.yaxis.set_tick_params(labelright=False) # auto-calculated ticks are fine except for redundant first and last ticks xticks = ax.get_xticks().tolist()[1:-1] yticks = ax.get_yticks().tolist()[1:-1] ax.set_xticks(xticks) ax.set_yticks(yticks) ax.set_xticklabels(xperiodic_probs) ax.set_yticklabels(yperiodic_probs) self.figure = fig return self.figure
def predict_sigle_position(self, sent, pos, prev2_label, prev_label): flag = False feature_vec, feature_index_list = self.gen_feature(sent, pos, prev2_label, prev_label) if self.train_mode and self.drop_out: to_block=set(permutation(arange(self.non_fixed_param))[:self.dropout_size]) #print 'to_block',list(to_block)[:10] block = array([0 if zzz in to_block else 1 for zzz in range(self.pred_size)]) feature_vec = multiply(feature_vec, block) elif self.drop_out: # for dropout mode at testing time... feature_vec = (1-self.dropout_rate) * feature_vec block = None else: block = None if block: print 'block=', block if flag: print 'pos, char=', pos, sent[pos] print 'feat_index_list=', feature_index_list, ';features are:', ' '.join([self.index2word[ind] for ind in feature_index_list]) c0 = sent[pos] if pos<len(sent) else self.END pred_tuple = tuple([self.su_prefix+varient+c0 for varient in self.state_varient]) if pred_tuple[0] in self.vocab and pred_tuple[1] in self.vocab: pass else: pred_tuple = None if self.train_mode: print 'Unknown candidate! Should NOT happen during training!' assert False pred_tuple2 = tuple([self.label0_as_vocab, self.label1_as_vocab]) softmax_score = None if pred_tuple: pred_index_list = [self.vocab[pred].index for pred in pred_tuple] pred_matrix = self.syn1neg[pred_index_list] if block is not None: pred_matrix = multiply(block, pred_matrix) elif self.drop_out: pred_matrix = (1-self.dropout_rate) * pred_matrix raw_score = exp(dot(feature_vec, pred_matrix.T)) softmax_score= raw_score/sum(raw_score) pred_index_list2 = [self.vocab[pred].index for pred in pred_tuple2] pred_matrix2 = self.syn1neg[pred_index_list2] if block is not None: pred_matrix2 = multiply(block, pred_matrix2) elif self.drop_out: pred_matrix = (1-self.dropout_rate) * pred_matrix2 raw_score2 = exp(dot(feature_vec, pred_matrix2.T)) softmax_score2= raw_score2/sum(raw_score2) #print pred_matrix2.shape, pred_matrix.shape if pred_tuple: softmax_score2 = np_append(softmax_score2, softmax_score) pred_index_list2.extend(pred_index_list) pred_matrix2 = np_append(pred_matrix2, pred_matrix, axis=0) #print pred_matrix2.shape, pred_matrix.shape if flag: print 'pred index and item=', pred_index_list2, ' '.join([self.index2word[ind] for ind in pred_index_list2]) return softmax_score2, feature_index_list, pred_index_list2, feature_vec, pred_matrix2
def outputCoverage(wigChrDict,bedLineL,opL,name_mode,strand,mp): label = '' valueL = np_array([]) other = '' for lineL in bedLineL: if label and label != lineL[3]: #---------------get the position with maximum value------- if mp: sort_indexL = argsort(valueL) #print valueL #print sort_indexL maxP = sort_indexL[-1] #print 'ORiginal maxP:', maxP diff = abs(sort_indexL[-1]-mid) maxV = valueL[maxP] #print 'Total length:', length for i in xrange(-2,length,-1): if valueL[sort_indexL[i]] == maxV: if diff > abs(sort_indexL[i]-mid): diff = abs(sort_indexL[i]-mid) maxP = sort_indexL[i] maxV = valueL[maxP] #print 'Iterated maxP:', maxP else: break #---------------------------- if other: print '%s\t%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start), str(maxP+start+1), name, str(maxV), '\t'.join(other)) other = '' else: print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start), str(maxP+start+1), name, str(maxV)) #---------------get the position with maximum value------- else: print "%s\t%s" % (name, \ '\t'.join([str(op(valueL)) for op in opL])) valueL = np_array([]) #----------output ---------------------------------- #---------------begin process-------------------- chr = lineL[0] start = int(lineL[1]) end = int(lineL[2]) length = start - end - 1 #used for xrange, -1 means include #start-end mid = (end-start)/2.0 label = lineL[3] if len(lineL) >= 6: other = '\t'.join(lineL[5:]) if strand: strand_in = lineL[5] strand_num = 0 if strand_in == '+' else 1 if name_mode: name = ''.join(label, '@', strand_in) else: name = '\t'.join(lineL) valueL = np_append(valueL, \ [wigChrDict.get(i,[0,0])[strand_num] for i in xrange(start, end)]) else: if name_mode: name = label else: name = '\t'.join(lineL) valueL = np_append(valueL, \ [wigChrDict.get(i,0) for i in xrange(start, end)]) #--------------------------------------------------------------------- if label: #---------------get the position with maximum value------- if mp: sort_indexL = argsort(valueL) #print valueL #print sort_indexL maxP = sort_indexL[-1] #print 'ORiginal maxP:', maxP diff = abs(sort_indexL[-1]-mid) maxV = valueL[maxP] #print 'Total length:', length for i in xrange(-2,length,-1): if valueL[sort_indexL[i]] == maxV: if diff > abs(sort_indexL[i]-mid): diff = abs(sort_indexL[i]-mid) maxP = sort_indexL[i] #print maxP else: break #---------------------------- if other: print '%s\t%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start), str(maxP+start+1), name, str(maxV), '\t'.join(other)) else: print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start), str(maxP+start+1), name, str(maxV)) #print "%s\t%s" % (name, str(maxP+start)) #---------------get the position with maximum value------- else: print "%s\t%s" % (name, \ '\t'.join([str(op(valueL)) for op in opL])) valueL = np_array([])
def get_meshsolution(self, output): """Build the MeshSolution objects from the FEA outputs. Parameters ---------- self : MagElmer a MagElmer object output: Output An Output object Returns ------- meshsol: MeshSolution a MeshSolution object with Elmer outputs at every time step """ project_name = self.get_path_save_fea(output) elmermesh_folder = project_name meshsol = MeshSolution(label="Elmer MagnetoDynamics") if not self.is_get_mesh or not self.is_save_FEA: self.get_logger().info( "MagElmer: MeshSolution is not stored by request.") return False meshvtk = MeshVTK(path=elmermesh_folder, name="step_t0002", format="vtu") meshsol.mesh = [meshvtk] result_filename = join(elmermesh_folder, "step_t0002.vtu") meshsolvtu = read(result_filename) #pt_data = meshsolvtu.point_data cell_data = meshsolvtu.cell_data #indices = arange(meshsolvtu.points.shape[0]) indices = arange(meshsolvtu.cells[0].data.shape[0] + meshsolvtu.cells[1].data.shape[0]) Indices = Data1D(name="indice", values=indices, is_components=True) # store_dict = { # "magnetic vector potential": { # "name": "Magnetic Vector Potential A", # "unit": "Wb", # "symbol": "A", # "norm": 1, # }, # "magnetic flux density": { # "name": "Magnetic Flux Density B", # "unit": "T", # "symbol": "B", # "norm": 1, # }, # "magnetic field strength": { # "name": "Magnetic Field H", # "unit": "A/m", # "symbol": "H", # "norm": 1, # }, # "current density": { # "name": "Current Density J", # "unit": "A/mm2", # "symbol": "J", # "norm": 1, # } # } store_dict = { "magnetic flux density e": { "name": "Magnetic Flux Density B", "unit": "T", "symbol": "B", "norm": 1, }, "magnetic vector potential e": { "name": "Magnetic Vector Potential A", "unit": "Wb", "symbol": "A", "norm": 1, }, "magnetic field strength e": { "name": "Magnetic Field H", "unit": "A/m", "symbol": "H", "norm": 1, }, "current density e": { "name": "Current Density J", "unit": "A/mm2", "symbol": "J", "norm": 1, }, } comp_ext = ["x", "y", "z"] sol_list = [] #for key, value in pt_data.items(): for key, value in cell_data.items(): if key in store_dict.keys(): #siz = value.shape[1] siz = value[0].shape[1] if siz > 3: print("Some Message") siz = 3 components = [] comp_name = [] values = np_append(value[0], value[1], axis=0) for i in range(siz): if siz == 1: ext = "" else: ext = comp_ext[i] data = DataTime( name=store_dict[key]["name"] + ext, unit=store_dict[key]["unit"], symbol=store_dict[key]["symbol"] + ext, axes=[Indices], #values=value[:, i], values=values[:, i], normalizations={"ref": store_dict[key]["norm"]}, ) components.append(data) comp_name.append("comp_" + ext) if siz == 1: field = components[0] sol_list.append( SolutionData( field=field, #type_cell="point", type_cell="triangle", label=store_dict[key]["symbol"], )) else: comps = {} for i in range(siz): comps[comp_name[i]] = components[i] field = VectorField(name=store_dict[key]["name"], symbol=store_dict[key]["symbol"], components=comps) sol_list.append( SolutionVector( field=field, #type_cell="point", type_cell="triangle", label=store_dict[key]["symbol"], )) meshsol.solution = sol_list output.mag.meshsolution = meshsol return True
def main(): options, args = cmdparameter(sys.argv) #----------------------------------- bed = options.bed wig = options.wig strand = options.strand verbose = options.verbose debug = options.debug wigDict = readWig(wig, strand) opL = options.op.split(',') name_mode = int(options.name) #----------------------------------- opDict = {'mean':mean, 'median':median, \ 'max':max, 'min':min, 'sum':sum} if file == '-': fh = sys.stdin else: fh = open(bed) #-------------------------------- if name_mode: print "#name\t%s" % '\t'.join(opL) else: print "#%s" % '\t'.join(opL) label = '' valueL = np_array([]) #print wigDict for line in fh: lineL = line.strip().split('\t') chr = lineL[0] start = int(lineL[1]) end = int(lineL[2]) innerD = wigDict[chr] if label and label != lineL[3]: print "%s\t%s" % (name, \ '\t'.join([str(opDict[op](valueL)) for op in opL])) valueL = np_array([]) #--------------------------------------------------------- label = lineL[3] if strand: strand_in = lineL[5] if name_mode: name = label + '@' + strand_in else: name = line.strip() strand_num = 0 if strand_in == '+' else 1 valueL = np_append(valueL, [innerD.get(i,[0,0])[strand_num] \ for i in xrange(start,end)]) else: if name_mode: name = lineL[3] else: name = line.strip() valueL = np_append(valueL, [innerD.get(i,0) \ for i in xrange(start,end)]) #---------------------------------------------- #print valueL #for op in opL: # tmpL.append(str(opDict[op](valueL))) #-------------END reading file---------- #------for the last name----------------- if label: print "%s\t%s" % (name, \ '\t'.join([str(opDict[op](valueL)) for op in opL])) #----close file handle for files----- if file != '-': fh.close() #-----------end close fh----------- if verbose: print >>sys.stderr,\ "--Successful %s" % strftime(timeformat, localtime())
def load(satscene, calibrate=True, area_extent=None, read_basic_or_detailed='both', **kwargs): """Load MSG SEVIRI High Resolution Wind (HRW) data from hdf5 format. """ # Read config file content conf = ConfigParser() conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg")) values = { "orbit": satscene.orbit, "satname": satscene.satname, "number": satscene.number, "instrument": satscene.instrument_name, "satellite": satscene.fullname } LOG.info("assume seviri-level5") print "... assume seviri-level5" satscene.add_to_history("hdf5 data read by mpop/nwcsaf_hrw_hdf.py") # end of scan time 4 min after start end_time = satscene.time_slot + datetime.timedelta(minutes=4) # area !!! satscene.area filename = os.path.join( satscene.time_slot.strftime(conf.get("seviri-level5", "dir", raw=True)), satscene.time_slot.strftime( conf.get("seviri-level5", "filename", raw=True)) % values) # define classes before we search for files (in order to return empty class if no file is found) HRW_basic = HRW_class() HRW_basic.detailed = False HRW_basic.date = satscene.time_slot HRW_detailed = HRW_class() HRW_detailed.detailed = True HRW_detailed.date = satscene.time_slot print "... search for file: ", filename filenames = glob(str(filename)) if len(filenames) != 0: if len(filenames) > 1: print "*** Warning, more than 1 datafile found: ", filenames filename = filenames[0] print("... read data from %s" % str(filename)) # create an instant of the HRW_class m_per_s_to_knots = 1.944 ## limit channels to read #hrw_channels=['HRV'] # limit basic or detailed or both #read_basic_or_detailed='detailed' #read_basic_or_detailed='basic' with h5py.File(filename, 'r') as hf: #print hf.attrs.keys() #print hf.attrs.values() region_name = hf.attrs['REGION_NAME'].replace("_", "") print "... read HRW data for region ", region_name LOG.info("... read HRW data for region " + region_name) sat_ID = GP_IDs[int(hf.attrs["GP_SC_ID"])] print "... derived from Meteosat ", sat_ID LOG.info("... derived from Meteosat " + sat_ID) # print('List of arrays in this file: \n', hf.keys()), len(hf.keys()) if len(hf.keys()) == 0: print "*** Warning, empty file ", filename print "" else: for key in hf.keys(): if key[4:9] == "BASIC": if 'read_basic_or_detailed' in locals(): if read_basic_or_detailed.lower() == "detailed": continue HRW_data = HRW_basic # shallow copy elif key[4:12] == "DETAILED": if 'read_basic_or_detailed' in locals(): if read_basic_or_detailed.lower() == "basic": continue HRW_data = HRW_detailed # shallow copy hrw_chn = dict_channel[key[len(key) - 9:]] if 'hrw_channels' in locals(): if hrw_channels != None: if hrw_chn not in hrw_channels: print "... " + hrw_chn + " is not in hrw_channels", hrw_channels print " skip reading this channel" continue # read all data channel = hf.get(key) # print '... read wind vectors of channel ', channel.name, hrw_chn # print " i lon lat speed[kn] dir pressure" #for i in range(channel.len()): # print '%3d %10.7f %10.7f %7.2f %7.1f %8.1f' % (channel[i]['wind_id'], channel[i]['lon'], channel[i]['lat'], \ # channel[i]['wind_speed']*m_per_s_to_knots, \ # channel[i]['wind_direction'], channel[i]['pressure']) # create string array with channel names channel_chararray = np_empty(channel.len(), dtype='|S6') channel_chararray[:] = hrw_chn HRW_data.channel = np_append(HRW_data.channel, channel_chararray) HRW_data.wind_id = np_append(HRW_data.wind_id, channel[:]['wind_id']) HRW_data.prev_wind_id = np_append( HRW_data.prev_wind_id, channel[:]['prev_wind_id']) HRW_data.segment_X = np_append(HRW_data.segment_X, channel[:]['segment_X']) HRW_data.segment_Y = np_append(HRW_data.segment_Y, channel[:]['segment_Y']) HRW_data.t_corr_method = np_append( HRW_data.t_corr_method, channel[:]['t_corr_method']) HRW_data.lon = np_append(HRW_data.lon, channel[:]['lon']) HRW_data.lat = np_append(HRW_data.lat, channel[:]['lat']) HRW_data.dlon = np_append(HRW_data.dlon, channel[:]['dlon']) HRW_data.dlat = np_append(HRW_data.dlat, channel[:]['dlat']) HRW_data.pressure = np_append(HRW_data.pressure, channel[:]['pressure']) HRW_data.wind_speed = np_append(HRW_data.wind_speed, channel[:]['wind_speed']) HRW_data.wind_direction = np_append( HRW_data.wind_direction, channel[:]['wind_direction']) HRW_data.temperature = np_append(HRW_data.temperature, channel[:]['temperature']) HRW_data.conf_nwp = np_append(HRW_data.conf_nwp, channel[:]['conf_nwp']) HRW_data.conf_no_nwp = np_append(HRW_data.conf_no_nwp, channel[:]['conf_no_nwp']) HRW_data.t_type = np_append(HRW_data.t_type, channel[:]['t_type']) HRW_data.t_level_method = np_append( HRW_data.t_level_method, channel[:]['t_level_method']) HRW_data.t_winds = np_append(HRW_data.t_winds, channel[:]['t_winds']) HRW_data.t_corr_test = np_append(HRW_data.t_corr_test, channel[:]['t_corr_test']) HRW_data.applied_QI = np_append(HRW_data.applied_QI, channel[:]['applied_QI']) HRW_data.NWP_wind_levels = np_append( HRW_data.NWP_wind_levels, channel[:]['NWP_wind_levels']) HRW_data.num_prev_winds = np_append( HRW_data.num_prev_winds, channel[:]['num_prev_winds']) HRW_data.orographic_index = np_append( HRW_data.orographic_index, channel[:]['orographic_index']) HRW_data.cloud_type = np_append(HRW_data.cloud_type, channel[:]['cloud_type']) HRW_data.wind_channel = np_append( HRW_data.wind_channel, channel[:]['wind_channel']) HRW_data.correlation = np_append(HRW_data.correlation, channel[:]['correlation']) HRW_data.pressure_error = np_append( HRW_data.pressure_error, channel[:]['pressure_error']) # sort according to wind_id inds = HRW_data.wind_id.argsort() HRW_data.subset(inds) # changes HRW_data itself # sorting without conversion to numpy arrays #[e for (wid,pwid) in sorted(zip(HRW_data.wind_id,HRW_data.prev_wind_id))] else: print "*** Error, no file found" print "" sat_ID = "no file" # but we continue the program in order to add an empty channel below ## filter data according to the given optional arguments #n1 = str(HRW_data.channel.size) #HRW_data = HRW_data.filter(**kwargs) #print " apply filters "+' ('+n1+'->'+str(HRW_data.channel.size)+')' chn_name = "HRW" satscene[chn_name].HRW_basic = HRW_basic.filter( **kwargs) # returns new object (deepcopied and filtered) satscene[chn_name].HRW_detailed = HRW_detailed.filter( **kwargs) # returns new object (deepcopied and filtered) satscene[chn_name].info['units'] = 'm/s' satscene[chn_name].info['satname'] = 'meteosat' satscene[chn_name].info['satnumber'] = sat_ID satscene[chn_name].info['instrument_name'] = 'seviri' satscene[chn_name].info['time'] = satscene.time_slot satscene[chn_name].info['is_calibrated'] = True
def main(): options, args = cmdparameter(sys.argv) #----------------------------------- bed = options.bed wig = options.wig strand = options.strand verbose = options.verbose debug = options.debug wigDict = readWig(wig, strand) opL = options.op.split(',') name_mode = int(options.name) #----------------------------------- opDict = {'mean':mean, 'median':median, \ 'max':max, 'min':min, 'sum':sum} if file == '-': fh = sys.stdin else: fh = open(bed) #-------------------------------- if name_mode: print "#name\t%s" % '\t'.join(opL) else: print "#%s" % '\t'.join(opL) label = '' valueL = np_array([]) #print wigDict for line in fh: lineL = line.strip().split('\t') chr = lineL[0] start = int(lineL[1]) end = int(lineL[2]) innerD = wigDict[chr] if label and label != lineL[3]: print "%s\t%s" % (name, \ '\t'.join([str(opDict[op](valueL)) for op in opL])) valueL = np_array([]) #--------------------------------------------------------- label = lineL[3] if strand: strand_in = lineL[5] if name_mode: name = label+'@'+ strand_in else: name = line.strip() strand_num = 0 if strand_in=='+' else 1 valueL = np_append(valueL, [innerD.get(i,[0,0])[strand_num] \ for i in xrange(start,end)]) else: if name_mode: name = lineL[3] else: name = line.strip() valueL = np_append(valueL, [innerD.get(i,0) \ for i in xrange(start,end)]) #---------------------------------------------- #print valueL #for op in opL: # tmpL.append(str(opDict[op](valueL))) #-------------END reading file---------- #------for the last name----------------- if label: print "%s\t%s" % (name, \ '\t'.join([str(opDict[op](valueL)) for op in opL])) #----close file handle for files----- if file != '-': fh.close() #-----------end close fh----------- if verbose: print >>sys.stderr,\ "--Successful %s" % strftime(timeformat, localtime())
self.assertEqual(z_obj._removed, removed) def test_remove_by_indices(self): """Test removing a bunch of indices""" z_obj = type(self).z_obj other_z_obj = deepcopy(z_obj) indices = [2, 23] z_obj._remove_by_indices(indices) for index in indices: other_z_obj._remove_by_index(index) # Just checking lengths is probably fine self.assertEqual(z_obj._lengths, other_z_obj._lengths) def calculate_zscores(self): """Test calculating zscores""" pass def test_remove_by_zscores(self): """Test removing by zscore""" pass if __name__ == '__main__': lengths = _mock_data() m, s = mean(lengths), std(lengths) # Original data set does not contain any outliers by Z-score # Add some extreme values zlengths = np_append(lengths, [300, 400, 600, 700]) outlier_vals = [l for l in zlengths if abs(l - m) / s >= 3] print(outlier_vals)
def __add__(self, HRW_class2): HRW_new = HRW_class() HRW_new.date = self.date # !!! does not make sense !!! HRW_new.detailed = self.detailed # !!! does not make sense !!! HRW_new.channel = np_append(self.channel, HRW_class2.channel) HRW_new.wind_id = np_append(self.wind_id, HRW_class2.wind_id) HRW_new.prev_wind_id = np_append(self.prev_wind_id, HRW_class2.prev_wind_id) HRW_new.segment_X = np_append(self.segment_X, HRW_class2.segment_X) HRW_new.segment_Y = np_append(self.segment_Y, HRW_class2.segment_Y) HRW_new.t_corr_method = np_append(self.t_corr_method, HRW_class2.t_corr_method) HRW_new.lon = np_append(self.lon, HRW_class2.lon) HRW_new.lat = np_append(self.lat, HRW_class2.lat) HRW_new.dlon = np_append(self.dlon, HRW_class2.dlon) HRW_new.dlat = np_append(self.dlat, HRW_class2.dlat) HRW_new.pressure = np_append(self.pressure, HRW_class2.pressure) HRW_new.wind_speed = np_append(self.wind_speed, HRW_class2.wind_speed) HRW_new.wind_direction = np_append(self.wind_direction, HRW_class2.wind_direction) HRW_new.temperature = np_append(self.temperature, HRW_class2.temperature) HRW_new.conf_nwp = np_append(self.conf_nwp, HRW_class2.conf_nwp) HRW_new.conf_no_nwp = np_append(self.conf_no_nwp, HRW_class2.conf_no_nwp) HRW_new.t_type = np_append(self.t_type, HRW_class2.t_type) HRW_new.t_level_method = np_append(self.t_level_method, HRW_class2.t_level_method) HRW_new.t_winds = np_append(self.t_winds, HRW_class2.t_winds) HRW_new.t_corr_test = np_append(self.t_corr_test, HRW_class2.t_corr_test) HRW_new.applied_QI = np_append(self.applied_QI, HRW_class2.applied_QI) HRW_new.NWP_wind_levels = np_append(self.NWP_wind_levels, HRW_class2.NWP_wind_levels) HRW_new.num_prev_winds = np_append(self.num_prev_winds, HRW_class2.num_prev_winds) HRW_new.orographic_index= np_append(self.orographic_index,HRW_class2.orographic_index) HRW_new.cloud_type = np_append(self.cloud_type, HRW_class2.cloud_type) HRW_new.wind_channel = np_append(self.wind_channel, HRW_class2.wind_channel) HRW_new.correlation = np_append(self.correlation, HRW_class2.correlation) HRW_new.pressure_error = np_append(self.pressure_error, HRW_class2.pressure_error) return HRW_new
def transfer_learning(self, max_iterations, accepted_mean_square_error=0.1, batch_size=5000, learning_rate=1e-4): # Adam optimizer optimizes the parameters (weights and biases) at the learning rate specified output_network_optimizer = Adam(self.output_neural_net.parameters(), lr=learning_rate) # This is the row count difference between that of input and output grids row_difference = self.output_grid_dimension[0] - self.input_grid_dimension[0] # This is the column count difference between that of input and output grids column_difference = self.output_grid_dimension[1] - self.input_grid_dimension[1] # We cycle through iterations of each batch of training the output neural network until the max iteration for _ in range(0, max_iterations): # Training list - each element in the list contains [input state for output neural net, target value] training_list = [] # Shuffling through batches and then calculating the Mean square error for the entire batch for batch in range(0, batch_size): # Creating a matrix to hold the observation state of the input neural network map (0 or 1) output_network_known_state = np_array([[randint(0, 1) for _ in range(0, self.output_grid_dimension[1])] for _ in range(0, self.output_grid_dimension[0])]) # This is used to store the robot and target state of the input neural network input_network_state = np_empty((self.network_robot_count+1)*2, dtype=np_uint8) # Creating positions for all of the robots and target of the input neural network randomly for i in range(0, (self.network_robot_count+1)*2, 2): input_network_state[i] = randint(0, self.input_grid_dimension[0]-1) input_network_state[i+1] = randint(0, self.input_grid_dimension[1]-1) # Creates a backup copy of the input state input_network_state_memory = input_network_state # Sliding the input network state window over different sections of the output network state for i in range(0, row_difference): for j in range(0, column_difference): # Creating a matrix to hold the observation state of the input neural network map (0 or 1) input_network_known_state = output_network_known_state[i:(i+self.input_grid_dimension[0]), j:(j+self.input_grid_dimension[1])] # This is used to store the robot and target state of the output neural network output_network_state = np_empty((self.network_robot_count+1)*2, dtype=np_uint8) # Extending the input position states across the moving window within the output grid dimensions for k in range(0, (self.network_robot_count+1)*2, 2): output_network_state[k] = input_network_state[k]+i output_network_state[k+1] = input_network_state[k+1]+j # Now we flatten data in the input network state, a 2-D matrix to 1-D and append input_network_state = np_append(input_network_state, np_ndarray.flatten(input_network_known_state)) # Now we flatten data in the output network state, a 2-D matrix to 1-D and append output_network_state = np_append(output_network_state, np_ndarray.flatten(output_network_known_state)) # Looping through the 4 possible actions each robot can take and then appending them to state for k in range(0, 4): # Adding an action completes the input state for the input neural network input_network_state_tensor = Tensor(np_append(input_network_state, k)) # Adding an action completes the input state for the output neural network output_network_state_tensor = Tensor(np_append(output_network_state, k)) # Getting the Q value predicted by the input neural network for the given state input_network_predicted_value = self.input_neural_net.forward(input_network_state_tensor) # Now we know the value the output neural network is to be trained towards for its given # input. Add both of them to the training list so that batch training can occur later training_list.append([output_network_state_tensor, input_network_predicted_value]) # Restoring the input state from memory input_network_state = input_network_state_memory # Shuffling the training data before feeding it in for training shuffle(training_list) # Initializing the current MSE loss sum_square_error = 0.0 # Using the batch of state and target data for training the output neural network for batch in range(0, batch_size): # Obtaining the completed input states for the output neural network output_network_state_tensor = training_list[batch][0] # Obtaining the target predictions that the output neural network should be trained towards predicted_target_value = training_list[batch][1] # Getting the Q value predicted by the output neural network for the given input state output_network_predicted_value = self.output_neural_net.forward(output_network_state_tensor) # Adding the current square error to the sum of square errors sum_square_error += pow((output_network_predicted_value - predicted_target_value), 2) # Represents the function that can calculate training error training_error_function = MSELoss() # Our goal is to reduce the mean square error loss between the target prediction and that of network training_error = training_error_function(output_network_predicted_value, predicted_target_value) # Clears the gradients of all optimized torch tensors output_network_optimizer.zero_grad() # During the backwards pass, gradients from each replica are summed into the original module training_error.backward() # Training actually happens here. Performs a single optimization step of weights and biases output_network_optimizer.step() # Dividing the sum of square errors by the batch size to get the mean square error current_mean_square_error = sum_square_error/batch_size print(current_mean_square_error) # Checks if the MSE for the entire batch is within acceptable levels and then returns the output neural net if current_mean_square_error <= accepted_mean_square_error: # we return a list where true indicates that we achieved the accepted mean square error criteria return [self.output_neural_net, True] # Failed to completely train the output neural network. Return a list with second element false to indicate this return [self.output_neural_net, False]
def outputCoverage(wigChrDict, bedLineL, opL, name_mode, strand, mp): label = '' valueL = np_array([]) other = '' for lineL in bedLineL: if label and label != lineL[3]: #---------------get the position with maximum value------- if mp: sort_indexL = argsort(valueL) #print valueL #print sort_indexL maxP = sort_indexL[-1] #print 'ORiginal maxP:', maxP diff = abs(sort_indexL[-1] - mid) maxV = valueL[maxP] #print 'Total length:', length for i in xrange(-2, length, -1): if valueL[sort_indexL[i]] == maxV: if diff > abs(sort_indexL[i] - mid): diff = abs(sort_indexL[i] - mid) maxP = sort_indexL[i] maxV = valueL[maxP] #print 'Iterated maxP:', maxP else: break #---------------------------- if other: print '%s\t%s\t%s\t%s\t%s\t%s' % ( chr, str(maxP + start), str(maxP + start + 1), name, str(maxV), '\t'.join(other)) other = '' else: print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP + start), str(maxP + start + 1), name, str(maxV)) #---------------get the position with maximum value------- else: print "%s\t%s" % (name, \ '\t'.join([str(op(valueL)) for op in opL])) valueL = np_array([]) #----------output ---------------------------------- #---------------begin process-------------------- chr = lineL[0] start = int(lineL[1]) end = int(lineL[2]) length = start - end - 1 #used for xrange, -1 means include #start-end mid = (end - start) / 2.0 label = lineL[3] if len(lineL) >= 6: other = '\t'.join(lineL[5:]) if strand: strand_in = lineL[5] strand_num = 0 if strand_in == '+' else 1 if name_mode: name = ''.join(label, '@', strand_in) else: name = '\t'.join(lineL) valueL = np_append(valueL, \ [wigChrDict.get(i,[0,0])[strand_num] for i in xrange(start, end)]) else: if name_mode: name = label else: name = '\t'.join(lineL) valueL = np_append(valueL, \ [wigChrDict.get(i,0) for i in xrange(start, end)]) #--------------------------------------------------------------------- if label: #---------------get the position with maximum value------- if mp: sort_indexL = argsort(valueL) #print valueL #print sort_indexL maxP = sort_indexL[-1] #print 'ORiginal maxP:', maxP diff = abs(sort_indexL[-1] - mid) maxV = valueL[maxP] #print 'Total length:', length for i in xrange(-2, length, -1): if valueL[sort_indexL[i]] == maxV: if diff > abs(sort_indexL[i] - mid): diff = abs(sort_indexL[i] - mid) maxP = sort_indexL[i] #print maxP else: break #---------------------------- if other: print '%s\t%s\t%s\t%s\t%s\t%s' % (chr, str(maxP + start), str(maxP + start + 1), name, str(maxV), '\t'.join(other)) else: print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP + start), str(maxP + start + 1), name, str(maxV)) #print "%s\t%s" % (name, str(maxP+start)) #---------------get the position with maximum value------- else: print "%s\t%s" % (name, \ '\t'.join([str(op(valueL)) for op in opL])) valueL = np_array([])