Example #1
0
    def pca(self, data_matrix):
        """Perform PCA.

        Principal components are given in self.pca,
        and the variance in self.variance.

        Parameters
        ----------
        data_matrix : list of lists
          List of tetranucleotide signatures
        """

        cols = len(data_matrix[0])
        data_matrix = np_reshape(np_array(data_matrix), (len(data_matrix), cols))

        pca = PCA()
        pc, variance = pca.pca_matrix(data_matrix, 3, bCenter=True, bScale=False)

        # ensure pc matrix has at least 3 dimensions
        if pc.shape[1] == 1:
            pc = np_append(pc, np_zeros((pc.shape[0], 2)), 1)
            variance = np_append(variance[0], np_ones(2))
        elif pc.shape[1] == 2:
            pc = np_append(pc, np_zeros((pc.shape[0], 1)), 1)
            variance = np_append(variance[0:2], np_ones(1))

        return pc, variance
Example #2
0
    def pca(self, data_matrix):
        """Perform PCA.

        Principal components are given in self.pca,
        and the variance in self.variance.

        Parameters
        ----------
        data_matrix : list of lists
          List of tetranucleotide signatures
        """

        cols = len(data_matrix[0])
        data_matrix = np_reshape(np_array(data_matrix),
                                 (len(data_matrix), cols))

        pca = PCA()
        pc, variance = pca.pca_matrix(data_matrix,
                                      3,
                                      bCenter=True,
                                      bScale=False)

        # ensure pc matrix has at least 3 dimensions
        if pc.shape[1] == 1:
            pc = np_append(pc, np_zeros((pc.shape[0], 2)), 1)
            variance = np_append(variance[0], np_ones(2))
        elif pc.shape[1] == 2:
            pc = np_append(pc, np_zeros((pc.shape[0], 1)), 1)
            variance = np_append(variance[0:2], np_ones(1))

        return pc, variance
Example #3
0
def unbalanced_loads(all_weight):
    """
    车辆偏载统计
    :param all_weight:
    :return:
    """
    unbalanced_loads_coe = float(conf.unbalanced_loads_coe())  # (空载时的)偏载系数

    # 每列车厢左右轮重统计
    wheel_weight = all_weight[0]
    wheel_weight_arr = wheel_weight.reshape((-1, 4, 2))
    wheel_weight_arr_trans = wheel_weight_arr.transpose((1, 0, 2))
    wheel_weight_sum = sum(wheel_weight_arr_trans)

    # 每列车厢左右轴重统计
    axle_weight = all_weight[1]
    axle_weight_arr = axle_weight.reshape((-1, 4))
    axle_weight_arr_trans = axle_weight_arr.transpose((1, 0))
    axle_weight_arr_trans_sum = sum(axle_weight_arr_trans)
    axle_weight_arr_trans_half = axle_weight_arr_trans_sum / 2
    axle_weight_new_sum = np_append(axle_weight_arr_trans_half, axle_weight_arr_trans_half)
    axle_weight_sum = around(axle_weight_new_sum.reshape((2, -1)).transpose((1, 0)), 4)

    # 每列车厢左右转向架重统计
    bogie_weight = all_weight[2]
    bogie_weight_arr = bogie_weight.reshape((-1, 2)).transpose((1, 0))
    bogie_weight_arr_sum = sum(bogie_weight_arr)
    bogie_weight_arr_half = bogie_weight_arr_sum / 2
    bogie_weight_new_sum = np_append(bogie_weight_arr_half, bogie_weight_arr_half)
    bogie_weight_sum = around(bogie_weight_new_sum.reshape((2, -1)).transpose((1, 0)), 4)

    # 每列车厢左右车厢重统计
    carriage_weight = all_weight[3]
    carriage_weight_arr_half = carriage_weight / 2
    carriage_weight_new_sum = np_append(carriage_weight_arr_half, carriage_weight_arr_half)
    carriage_weight_sum = around(carriage_weight_new_sum.reshape((2, -1)).transpose((1, 0)), 4)

    # 整辆列车左右重量统计
    total_car_weight = wheel_weight_sum + axle_weight_sum + bogie_weight_sum + carriage_weight_sum

    # 各个车厢偏载统计
    diff_set = []
    is_unbalanced_loads = []
    for each_carriage_weight in total_car_weight:
        left_carriage_weight = each_carriage_weight[0]
        right_carriage_weight = each_carriage_weight[1]
        diff = round(abs(left_carriage_weight - right_carriage_weight), 4)
        mean_each_carriage_weight = round((left_carriage_weight + right_carriage_weight) / 2, 4)
        each_carriage_coe = unbalanced_loads_coe * sum(each_carriage_weight) / 38
        diff_set.append(diff)

        if mean_each_carriage_weight != 0:
            if diff > each_carriage_coe:  # 偏载:左右车厢的重量差值
                is_unbalanced_loads.append(1)  # 1表示偏载
            else:
                is_unbalanced_loads.append(0)  # 0表示未偏载
        else:
            is_unbalanced_loads.append(0)
    return is_unbalanced_loads
Example #4
0
    def _rqa_lv_dist_min(self, th, m_min, freq_dist, ret_sum=0):
        l = np_array([])
        p = np_array([])
        for item in freq_dist(th).items():
            if float(item[0]) >= m_min:
                if item[1]:
                    l = np_append(l, item[0])
                    p = np_append(p, item[1])

        if ret_sum:
            return p.sum()
        else:
            return (l, p)
Example #5
0
def init_ols_balanced(X, y, nn, random_state):

    n_samples = y.shape[0]
    n_neurons = nn.get_number_last_hidden_neurons()
    partial_semantics = zeros((n_samples, n_neurons))
    for i, hidden_neuron in enumerate(nn.hidden_layers[-1]):
        partial_semantics[:, i] = hidden_neuron.get_semantics()

    for output_index, output_neuron in enumerate(nn.output_layer):
        output_y = y[:, output_index]
        output_y_class_1_indices = where(output_y == 1)[0]
        output_y_class_1_count = output_y_class_1_indices.shape[0]
        output_y_class_0_count = n_samples - output_y_class_1_count

        sample_weights = ones(n_samples)
        class_1_weight = output_y_class_0_count / output_y_class_1_count
        sample_weights[output_y_class_1_indices] = class_1_weight

        reg = LinearRegression().fit(partial_semantics, output_y,
                                     sample_weights)
        optimal_weights = np_append(reg.coef_.T, reg.intercept_)

        # Update connections with the learning step value:
        for i in range(n_neurons):
            output_neuron.input_connections[-n_neurons +
                                            i].weight = optimal_weights[i]

        output_neuron.increment_bias(optimal_weights[-1])
Example #6
0
 def setUpClass(cls):
     # Mock unifrom distance objects and outliers
     cls.length_dict = {}
     for i in range(4):
         label = "group" + str(i + 1)
         cls.length_dict[label] = data
     outliers = [300, 400, 600, 700]
     # Make ScrollSeq objects and add outliers
     cls.seq_dict = {}
     cls.len_list = []
     group_counter = 0
     for group_id, ((k, v), new_val) in enumerate(
             zip(
                 cls.length_dict.items(),
                 outliers,
             )):
         length_list = list(np_append(v, [new_val]))
         for val_id, length in enumerate(length_list):
             cls.len_list.append(int(length))
             seq_obj = MockSeq(
                 obj_id="{}.{}".format(group_id, val_id),
                 value=int(length),
             )
             try:
                 cls.seq_dict[k].append(seq_obj)
             except KeyError:
                 cls.seq_dict[k] = []
                 cls.seq_dict[k].append(seq_obj)
     # Finally, set up single class instance`
     cls.z_obj = Filter(cls.seq_dict, filter_method='zscore')
Example #7
0
def ModMatInv(key, alpha_len, matrix_len):
    key_ident = np_append(key, identity(matrix_len), axis=1)
    key_det = round(det(key)) % alpha_len
    key_det_ext = ExtendedEuclidean(alpha_len, key_det)
    inverted_key = ModMatInit(key, key_ident, key_det_ext, alpha_len,
                              matrix_len)
    return inverted_key
Example #8
0
def EncryptDecryptHill_German(text_ascii_30, key, matrix_len, alpha_len):
    total_len = ceil(len(text_ascii_30) / matrix_len)
    text_key = zeros((1, total_len * matrix_len - len(text_ascii_30)))
    text_key = np_append(text_ascii_30, text_key)
    text_key = text_key.reshape(matrix_len, total_len, order='F')
    enc_dec_matrix = dot(text_key.T, key) % alpha_len
    total_len = total_len * matrix_len
    enc_dec_matrix = enc_dec_matrix.T.reshape(1, total_len, order='F')
    return [int(i) for i in array(enc_dec_matrix)[0].tolist()]
Example #9
0
    def gen_feature(self, sent, pos, prev2_label, prev_label):
        """
        get features vector from self.syn0 matrix
        get words index
        :param sent:
        :param pos:
        :param prev2_label:
        :param prev_label:
        :return:
        """
        u, u_1, u_2, u1, u2, b_1, b_2, b1, b2 = self.gen_unigram_bigram(
            sent, pos)

        ngram_feature_list = [u, u_1, u_2, u1, u2
                              ] + [''.join(b) for b in [b_1, b_2, b1, b2]]

        if self.no_bigram_feature:
            ngram_feature_list = ngram_feature_list[:-4]

        if self.no_unigram_feature:
            ngram_feature_list = ngram_feature_list[5:]

        state_feature_list = [
            self.su_prefix + self.state_varient[int(item[0])] + item[1]
            for item in zip([prev2_label, prev_label], [u_2, u_1])
        ]

        if not self.no_sb_feature:
            state_feature_list.append(
                self.sb_prefix + self.state_varient[int(prev_label)] +
                ''.join(b_1))  # change the bigram state def

        if self.no_action_feature:
            feat_list = ngram_feature_list
        else:
            feat_list = ngram_feature_list + state_feature_list

        feature_index_list = map(self.word2index, feat_list)
        feature_vec = self.syn0[feature_index_list].ravel()

        if self.no_binary_action_feature:
            feat_vec = feature_vec
        else:
            feat_vec = np_append(
                feature_vec, asanyarray([
                    float(prev2_label), float(prev_label)
                ]))  ###########  !!!!! tmp block the previous state feature..

        # print 'feature shape', feat_vec.shape
        # print 'feature_shape', feature_vec.shape
        # feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)]))  ###########  !!!!! tmp block the previous state feature..
        # print 'feature shape', feat_vec.shape

        return feat_vec, feature_index_list
Example #10
0
def naive_search_with_np(n):
    prime_numbers = np_array([2])
    i = prime_numbers[0]
    while len(prime_numbers) != n:
        i += 1
        for d in np_arange(2, i):
            i_is_prime = True
            if i%d == 0:
                i_is_prime = False
                break
        if i_is_prime:
            prime_numbers = np_append(prime_numbers, i)

    print('Байт:', getsizeof(prime_numbers))
Example #11
0
def updateAlgoData():
    """
    Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps).
    1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP).
    2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver.
    3) Incr clustering inserted rawdata for direct algo use.
    """
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        ver_wpp = wppdb.getRawdataVersion()
        # Sync rawdata into wpp_uprecsinfo from remote FTP server.
        print 'Probing rawdata version > [%s]' % ver_wpp
        vers_fpp,localbzs = syncFtpUprecs(FTPCFG, ver_wpp)
        if not vers_fpp: print 'Not found!'; continue
        else: print 'Found new vers: %s' % vers_fpp
        # Handle each bzip2 file.
        alerts = {'vers':[], 'details':''}
        tab_rd = 'wpp_uprecsinfo'
        for bzfile in localbzs:
            # Filter out the ver_uprecs info from the name of each bzip file.
            ver_bzfile = bzfile.split('_')[-1].split('.')[0]
            # Update ver_uprecs in wpp_uprecsver to ver_bzfile.
            wppdb.setRawdataVersion(ver_bzfile)
            print '%s\nUpdate ver_uprecs -> [%s]' % ('-'*40, ver_bzfile)
            # Decompress bzip2.
            sys.stdout.write('Decompress & append rawdata ... ')
            csvdat = csv.reader( BZ2File(bzfile) )
            try:
                indat = np_array([ line for line in csvdat ])
            except csv.Error, e:
                sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e))
            # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp.
            append_info = np_array([ [ver_bzfile,0,0] for i in xrange(len(indat)) ])
            indat_withvers = np_append(indat, append_info, axis=1).tolist(); print 'Done'
            # Import csv into wpp_uprecsinfo.
            try:
                sys.stdout.write('Import rawdata: ')
                wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True)
            except Exception, e:
                _lineno = sys._getframe().f_lineno
                _file = sys._getframe().f_code.co_filename
                alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \
                        (ver_bzfile, _file, _lineno, str(e).replace('\n', ' '))
                alerts['vers'].append(ver_bzfile)
                print 'ERROR: Insert Rawdata Failed!'
                continue
Example #12
0
    def gen_feature(self, sent, pos, prev2_label, prev_label):
        """
        get features vector from self.syn0 matrix
        get words index
        :param sent:
        :param pos:
        :param prev2_label:
        :param prev_label:
        :return:
        """
        u, u_1, u_2, u1, u2, b_1, b_2, b1, b2 = self.gen_unigram_bigram(sent, pos)

        ngram_feature_list = [u, u_1, u_2, u1, u2] + [''.join(b) for b in [b_1, b_2, b1, b2]]

        if self.no_bigram_feature:
            ngram_feature_list = ngram_feature_list[:-4]

        if self.no_unigram_feature:
            ngram_feature_list = ngram_feature_list[5:]

        state_feature_list = [self.su_prefix + self.state_varient[int(item[0])] + item[1] for item in
                              zip([prev2_label, prev_label], [u_2, u_1])]

        if not self.no_sb_feature:
            state_feature_list.append(
                    self.sb_prefix + self.state_varient[int(prev_label)] + ''.join(b_1))  # change the bigram state def

        if self.no_action_feature:
            feat_list = ngram_feature_list
        else:
            feat_list = ngram_feature_list + state_feature_list

        feature_index_list = map(self.word2index, feat_list)
        feature_vec = self.syn0[feature_index_list].ravel()

        if self.no_binary_action_feature:
            feat_vec = feature_vec
        else:
            feat_vec = np_append(feature_vec, asanyarray(
                    [float(prev2_label), float(prev_label)]))  ###########  !!!!! tmp block the previous state feature..

        # print 'feature shape', feat_vec.shape
        # print 'feature_shape', feature_vec.shape
        # feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)]))  ###########  !!!!! tmp block the previous state feature..
        # print 'feature shape', feat_vec.shape

        return feat_vec, feature_index_list
Example #13
0
def mutation_ols_ls_global_margin(X, y, nn, n_added_neurons, random_state):

    hidden_semantics = build_hidden_semantics(nn, n_added_neurons, y.shape[0])

    y_prob = nn.get_predictions().copy()
    softmax(y_prob)
    ce_loss = -xlogy(y, y_prob)
    m = ce_loss.max(axis=1)
    #===========================================================================
    # am = ce_loss.argmax(axis=1)
    #===========================================================================

    sample_weights = 1 + m

    #===============================================================================
    #     cut_off = 75
    #     cut_off = percentile(m, cut_off)
    #     above = where(m > cut_off)[0]
    #     above_count = above.shape[0]
    #     below_or_equal_count = n_samples - above_count
    #     #===========================================================================
    #     # below_or_equal = where(m <= cut_off)[0]
    #     # below_or_equal_count = below_or_equal.shape[0]
    #     #===========================================================================
    #
    #     sample_weights = ones(n_samples)
    #     sample_weights[above] = below_or_equal_count / above_count
    #===============================================================================

    for output_index, output_neuron in enumerate(nn.output_layer):
        output_delta_y = y[:,
                           output_index] - nn.get_predictions()[:,
                                                                output_index]
        reg = LinearRegression().fit(hidden_semantics, output_delta_y,
                                     sample_weights)
        optimal_weights = np_append(reg.coef_.T, reg.intercept_)
        #=======================================================================
        # print('\n\toptimal_weights [min, mean, max]: [%.5f, %.5f, %.5f]' % (optimal_weights.min(), optimal_weights.mean(), optimal_weights.max()))
        #=======================================================================

        # Update connections with the learning step value:
        for i in range(n_added_neurons):
            output_neuron.input_connections[-n_added_neurons +
                                            i].weight = optimal_weights[i]

        output_neuron.increment_bias(optimal_weights[-1])
    def gen_feature(self, sent, pos, prev2_label, prev_label):

        c0, c1, c2, c3, c4, b1, b2, b3, b4 = self.gen_uni_gram_bigram(
            sent, pos)

        ngram_feature_list = [c0, c1, c2, c3, c4
                              ] + [''.join(b) for b in [b1, b2, b3, b4]]

        if self.no_bigram_feature:
            ngram_feature_list = ngram_feature_list[:-4]

        if self.no_unigram_feature:
            ngram_feature_list = ngram_feature_list[5:]

        state_feature_list = [
            self.su_prefix + self.state_varient[int(item[0])] + item[1]
            for item in zip([prev2_label, prev_label], [c2, c1])
        ]

        if not self.no_sb_feature:
            state_feature_list.append(
                self.sb_prefix + self.state_varient[int(prev_label)] +
                ''.join(b1))  #change the bigram state def

        if self.no_action_feature:
            feat_list = ngram_feature_list
        else:
            feat_list = ngram_feature_list + state_feature_list

        feature_index_list = map(self.word2index, feat_list)
        feature_vec = self.syn0[feature_index_list].ravel()

        if self.no_binary_action_feature:
            feat_vec = feature_vec
        else:
            feat_vec = np_append(
                feature_vec, asanyarray([
                    float(prev2_label), float(prev_label)
                ]))  ###########  !!!!! tmp block the previous state feature..

        #print 'feature shape', feat_vec.shape
        #print 'feature_shape', feature_vec.shape
        #feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)]))  ###########  !!!!! tmp block the previous state feature..
        #print 'feature shape', feat_vec.shape

        return feat_vec, feature_index_list
Example #15
0
def wheel_weight_algorithm(ww_wheel_value):
    try:
        # axle_wheel_value = ww_wheel_value.transpose((1, 0))
        # 经验值:742.585 对应 2.1t ==> 重量系数:2.1 / 742.585
        # sum_left_right = 742.585
        # sum_left_right = 789.85
        sum_left_right = round(
            sum(sum(ww_wheel_value)) / len(ww_wheel_value),
            12)  # 使用固定值,才能确保重量,否则只能对标到2.1t
        wheelset_standard_weight = standard_left_weight + standard_right_weight + standard_axle_weight
        weight_coefficient = wheelset_standard_weight / sum_left_right
        left_wheel_coefficient = standard_left_weight / (
            standard_left_weight + standard_axle_weight / 2)
        right_wheel_coefficient = standard_right_weight / (
            standard_right_weight + standard_axle_weight / 2)
        axle_coefficient = standard_axle_weight / wheelset_standard_weight

        wheel_axle_weight = around(ww_wheel_value * weight_coefficient, 4)
        wheel_axle_weight_tran = wheel_axle_weight.transpose((1, 0))

        # 轮对的重量
        wheelset_weight = around(sum(wheel_axle_weight_tran), 4)

        # 轴的重量
        axle_weight = around(wheelset_weight * axle_coefficient, 3)

        # 车轮的重量
        left_wheel_weight = around(
            wheel_axle_weight_tran[0] * left_wheel_coefficient, 3)
        right_wheel_weight = around(
            wheel_axle_weight_tran[1] * right_wheel_coefficient, 3)
        wheel_weight = np_append(left_wheel_weight,
                                 right_wheel_weight).reshape(
                                     (2, -1)).transpose((1, 0))

        return [wheel_weight, axle_weight, wheelset_weight]
    except Exception as e:
        info(e)
    def gen_feature(self,sent, pos, prev2_label, prev_label):

        c0, c1, c2, c3, c4, b1, b2, b3, b4 =  self.gen_uni_gram_bigram(sent, pos)

        ngram_feature_list= [c0, c1, c2, c3, c4]+[''.join(b) for b in [b1,b2,b3,b4]]

        if self.no_bigram_feature:
            ngram_feature_list =ngram_feature_list[:-4]

        if self.no_unigram_feature:
            ngram_feature_list = ngram_feature_list[5:]


        state_feature_list=[self.su_prefix+self.state_varient[int(item[0])]+item[1] for item in zip([prev2_label, prev_label], [c2, c1])]

        if not self.no_sb_feature:
            state_feature_list.append(self.sb_prefix+self.state_varient[int(prev_label)]+''.join(b1)) #change the bigram state def

        if self.no_action_feature:
            feat_list = ngram_feature_list
        else:
            feat_list = ngram_feature_list+state_feature_list

        feature_index_list = map(self.word2index, feat_list)
        feature_vec = self.syn0[feature_index_list].ravel()

        if self.no_binary_action_feature:
            feat_vec = feature_vec
        else:
            feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)]))  ###########  !!!!! tmp block the previous state feature..

        #print 'feature shape', feat_vec.shape
        #print 'feature_shape', feature_vec.shape
        #feat_vec=np_append(feature_vec, asanyarray([float(prev2_label), float(prev_label)]))  ###########  !!!!! tmp block the previous state feature..
        #print 'feature shape', feat_vec.shape

        return feat_vec, feature_index_list
Example #17
0
def load(satscene, calibrate=True, area_extent=None, read_basic_or_detailed='both', **kwargs):
    """Load MSG SEVIRI High Resolution Wind (HRW) data from hdf5 format.
    """

    # Read config file content
    conf = ConfigParser()
    conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg"))
    values = {"orbit": satscene.orbit,
    "satname": satscene.satname,
    "number": satscene.number,
    "instrument": satscene.instrument_name,
    "satellite": satscene.fullname
    }

    LOG.info("assume seviri-level5")
    print "... assume seviri-level5"

    satscene.add_to_history("hdf5 data read by mpop/nwcsaf_hrw_hdf.py")

    # end of scan time 4 min after start 
    end_time = satscene.time_slot + datetime.timedelta(minutes=4)

    # area !!! satscene.area

    filename = os.path.join( satscene.time_slot.strftime(conf.get("seviri-level5", "dir", raw=True)),
                             satscene.time_slot.strftime(conf.get("seviri-level5", "filename", raw=True)) % values )

    # define classes before we search for files (in order to return empty class if no file is found)
    HRW_basic             = HRW_class()
    HRW_basic.detailed    = False 
    HRW_basic.date        = satscene.time_slot
    HRW_detailed          = HRW_class()
    HRW_detailed.detailed = True
    HRW_detailed.date     = satscene.time_slot

    print "... search for file: ", filename
    filenames=glob(str(filename))

    if len(filenames) != 0:

        if len(filenames) > 1:
            print "*** Warning, more than 1 datafile found: ", filenames 

        filename = filenames[0]
        print("... read data from %s" % str(filename))

        # create an instant of the HRW_class
        m_per_s_to_knots = 1.944

        ## limit channels to read 
        #hrw_channels=['HRV']
        # limit basic or detailed or both
        #read_basic_or_detailed='detailed'
        #read_basic_or_detailed='basic'


        with h5py.File(filename,'r') as hf:

            #print hf.attrs.keys()
            #print hf.attrs.values()

            region_name = hf.attrs['REGION_NAME'].replace("_", "")
            print "... read HRW data for region ", region_name
            LOG.info("... read HRW data for region "+region_name)
            sat_ID = GP_IDs[int(hf.attrs["GP_SC_ID"])]
            print "... derived from Meteosat ", sat_ID
            LOG.info("... derived from Meteosat "+sat_ID)

            # print('List of arrays in this file: \n', hf.keys()), len(hf.keys())

            if len(hf.keys()) == 0:
                print "*** Warning, empty file ", filename
                print ""
            else:
                for key in hf.keys():

                    if key[4:9] == "BASIC":
                        if 'read_basic_or_detailed' in locals():
                            if read_basic_or_detailed.lower() == "detailed":
                                continue
                        HRW_data = HRW_basic   # shallow copy 
                    elif key[4:12] == "DETAILED":
                        if 'read_basic_or_detailed' in locals():
                            if read_basic_or_detailed.lower() == "basic":
                                continue
                        HRW_data = HRW_detailed # shallow copy 

                    hrw_chn = dict_channel[key[len(key)-9:]]

                    if 'hrw_channels' in locals():
                        if hrw_channels != None:
                            if hrw_chn not in hrw_channels:
                                print "... "+hrw_chn+" is not in hrw_channels", hrw_channels 
                                print "    skip reading this channel" 
                                continue 

                    # read all  data 
                    channel = hf.get(key)
                    # print '... read wind vectors of channel ', channel.name, hrw_chn
                    # print  "  i    lon        lat      speed[kn] dir   pressure"
                    #for i in range(channel.len()):
                    #    print '%3d %10.7f %10.7f %7.2f %7.1f %8.1f' % (channel[i]['wind_id'], channel[i]['lon'], channel[i]['lat'], \
                    #                                                   channel[i]['wind_speed']*m_per_s_to_knots, \
                    #                                                   channel[i]['wind_direction'], channel[i]['pressure'])
                    # create string array with channel names 
                    channel_chararray = np_empty(channel.len(), dtype='|S6')
                    channel_chararray[:] = hrw_chn

                    HRW_data.channel          = np_append(HRW_data.channel         , channel_chararray              )
                    HRW_data.wind_id          = np_append(HRW_data.wind_id         , channel[:]['wind_id']          )    
                    HRW_data.prev_wind_id     = np_append(HRW_data.prev_wind_id    , channel[:]['prev_wind_id']     )    
                    HRW_data.segment_X        = np_append(HRW_data.segment_X       , channel[:]['segment_X']        )   
                    HRW_data.segment_Y        = np_append(HRW_data.segment_Y       , channel[:]['segment_Y']        )   
                    HRW_data.t_corr_method    = np_append(HRW_data.t_corr_method   , channel[:]['t_corr_method']    )   
                    HRW_data.lon              = np_append(HRW_data.lon             , channel[:]['lon']              )   
                    HRW_data.lat              = np_append(HRW_data.lat             , channel[:]['lat']              )   
                    HRW_data.dlon             = np_append(HRW_data.dlon            , channel[:]['dlon']             )  
                    HRW_data.dlat             = np_append(HRW_data.dlat            , channel[:]['dlat']             )   
                    HRW_data.pressure         = np_append(HRW_data.pressure        , channel[:]['pressure']         )   
                    HRW_data.wind_speed       = np_append(HRW_data.wind_speed      , channel[:]['wind_speed']       )   
                    HRW_data.wind_direction   = np_append(HRW_data.wind_direction  , channel[:]['wind_direction']   )  
                    HRW_data.temperature      = np_append(HRW_data.temperature     , channel[:]['temperature']      )   
                    HRW_data.conf_nwp         = np_append(HRW_data.conf_nwp        , channel[:]['conf_nwp']         )   
                    HRW_data.conf_no_nwp      = np_append(HRW_data.conf_no_nwp     , channel[:]['conf_no_nwp']      )   
                    HRW_data.t_type           = np_append(HRW_data.t_type          , channel[:]['t_type']           )  
                    HRW_data.t_level_method   = np_append(HRW_data.t_level_method  , channel[:]['t_level_method']   )  
                    HRW_data.t_winds          = np_append(HRW_data.t_winds         , channel[:]['t_winds']          ) 
                    HRW_data.t_corr_test      = np_append(HRW_data.t_corr_test     , channel[:]['t_corr_test']      )   
                    HRW_data.applied_QI       = np_append(HRW_data.applied_QI      , channel[:]['applied_QI']       )  
                    HRW_data.NWP_wind_levels  = np_append(HRW_data.NWP_wind_levels , channel[:]['NWP_wind_levels']  ) 
                    HRW_data.num_prev_winds   = np_append(HRW_data.num_prev_winds  , channel[:]['num_prev_winds']   )
                    HRW_data.orographic_index = np_append(HRW_data.orographic_index, channel[:]['orographic_index'] )
                    HRW_data.cloud_type       = np_append(HRW_data.cloud_type      , channel[:]['cloud_type']       )
                    HRW_data.wind_channel     = np_append(HRW_data.wind_channel    , channel[:]['wind_channel']     )
                    HRW_data.correlation      = np_append(HRW_data.correlation     , channel[:]['correlation']      )
                    HRW_data.pressure_error   = np_append(HRW_data.pressure_error  , channel[:]['pressure_error']   )

                # sort according to wind_id
                inds = HRW_data.wind_id.argsort()
                HRW_data.subset(inds) # changes HRW_data itself

                # sorting without conversion to numpy arrays 
                #[e for (wid,pwid) in sorted(zip(HRW_data.wind_id,HRW_data.prev_wind_id))]

    else:
        print "*** Error, no file found"
        print ""
        sat_ID = "no file"
        # but we continue the program in order to add an empty channel below 


    ## filter data according to the given optional arguments 
    #n1 = str(HRW_data.channel.size)
    #HRW_data = HRW_data.filter(**kwargs)   
    #print "    apply filters "+' ('+n1+'->'+str(HRW_data.channel.size)+')'

    chn_name="HRW"
    satscene[chn_name].HRW_basic    = HRW_basic.filter(**kwargs)     # returns new object (deepcopied and filtered)
    satscene[chn_name].HRW_detailed = HRW_detailed.filter(**kwargs)  # returns new object (deepcopied and filtered)
    satscene[chn_name].info['units'] = 'm/s'
    satscene[chn_name].info['satname'] = 'meteosat'
    satscene[chn_name].info['satnumber'] = sat_ID
    satscene[chn_name].info['instrument_name'] = 'seviri'
    satscene[chn_name].info['time'] = satscene.time_slot
    satscene[chn_name].info['is_calibrated'] = True
Example #18
0
def mutation_ols_ls_local_margin(X, y, nn, n_added_neurons, random_state):

    n_samples = y.shape[0]
    hidden_semantics = build_hidden_semantics(nn, n_added_neurons, n_samples)

    for output_index, output_neuron in enumerate(nn.output_layer):

        output_delta_y = y[:,
                           output_index] - nn.get_predictions()[:,
                                                                output_index]
        output_y = y[:, output_index]
        output_y_class_1_indices = where(output_y == 1)[0]
        #=======================================================================
        # output_y_class_1_count = output_y_class_1_indices.shape[0]
        #=======================================================================
        output_y_class_0_indices = where(output_y == 0)[0]
        #=======================================================================
        # output_y_class_0_count = output_y_class_0_indices.shape[0]
        #=======================================================================

        margin = 0.25

        class_1_outliers_indices = where(
            output_delta_y[output_y_class_1_indices] < margin)[0]
        class_0_outliers_indices = where(
            output_delta_y[output_y_class_0_indices] > -margin)[0]
        #===============================================================
        # outliers_count = class_1_outliers_indices.shape[0] + class_0_outliers_indices.shape[0]
        # inliers_count = n_samples - outliers_count
        #===============================================================

        class_1_inliers_indices = where(
            output_delta_y[output_y_class_1_indices] >= margin)[0]
        class_0_inliers_indices = where(
            output_delta_y[output_y_class_0_indices] <= -margin)[0]
        inliers_count = class_1_inliers_indices.shape[
            0] + class_0_inliers_indices.shape[0]
        outliers_count = n_samples - inliers_count

        sample_weights = ones(n_samples)
        if inliers_count > 0 and outliers_count > 0:
            if outliers_count >= inliers_count:
                weight = outliers_count / inliers_count
            else:
                weight = inliers_count / outliers_count

            sample_weights[
                output_y_class_1_indices[class_1_inliers_indices]] = weight
            sample_weights[
                output_y_class_0_indices[class_0_inliers_indices]] = weight
            """ > 1 or < 0: delta of 0 """
            class_1_outliers_indices = where(
                output_delta_y[output_y_class_1_indices] < 0)[0]
            class_0_outliers_indices = where(
                output_delta_y[output_y_class_0_indices] > 0)[0]

            output_delta_y[
                output_y_class_1_indices[class_1_outliers_indices]] = 0
            output_delta_y[
                output_y_class_0_indices[class_0_outliers_indices]] = 0
        #=======================================================================
        # else:
        #     print("[Debug] Else")
        #     print()
        #=======================================================================

        reg = LinearRegression().fit(hidden_semantics, output_delta_y,
                                     sample_weights)
        optimal_weights = np_append(reg.coef_.T, reg.intercept_)
        #=======================================================================
        # print('\n\toptimal_weights [min, mean, max]: [%.5f, %.5f, %.5f]' % (optimal_weights.min(), optimal_weights.mean(), optimal_weights.max()))
        #=======================================================================

        # Update connections with the learning step value:
        for i in range(n_added_neurons):
            output_neuron.input_connections[-n_added_neurons +
                                            i].weight = optimal_weights[i]

        output_neuron.increment_bias(optimal_weights[-1])
Example #19
0
def updateAlgoData():
    """
    Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps).
    1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP).
    2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver.
    3) Incr clustering inserted rawdata for direct algo use.
    """
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        ver_wpp = wppdb.getRawdataVersion()
        # Sync rawdata into wpp_uprecsinfo from remote FTP server.
        print 'Probing rawdata version > [%s]' % ver_wpp
        vers_fpp, localbzs = syncFtpUprecs(FTPCFG, ver_wpp)
        if not vers_fpp:
            print 'Not found!'
            continue
        else:
            print 'Found new vers: %s' % vers_fpp
        # Handle each bzip2 file.
        alerts = {'vers': [], 'details': ''}
        tab_rd = 'wpp_uprecsinfo'
        for bzfile in localbzs:
            # Filter out the ver_uprecs info from the name of each bzip file.
            ver_bzfile = bzfile.split('_')[-1].split('.')[0]
            # Update ver_uprecs in wpp_uprecsver to ver_bzfile.
            wppdb.setRawdataVersion(ver_bzfile)
            print '%s\nUpdate ver_uprecs -> [%s]' % ('-' * 40, ver_bzfile)
            # Decompress bzip2.
            sys.stdout.write('Decompress & append rawdata ... ')
            csvdat = csv.reader(BZ2File(bzfile))
            try:
                indat = np_array([line for line in csvdat])
            except csv.Error, e:
                sys.exit('\n\nERROR: %s, line %d: %s!\n' %
                         (bzfile, csvdat.line_num, e))
            # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp.
            append_info = np_array([[ver_bzfile, 0, 0]
                                    for i in xrange(len(indat))])
            indat_withvers = np_append(indat, append_info, axis=1).tolist()
            print 'Done'
            # Import csv into wpp_uprecsinfo.
            try:
                sys.stdout.write('Import rawdata: ')
                wppdb.insertMany(table_name=tab_rd,
                                 indat=indat_withvers,
                                 verb=True)
            except Exception, e:
                _lineno = sys._getframe().f_lineno
                _file = sys._getframe().f_code.co_filename
                alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \
                        (ver_bzfile, _file, _lineno, str(e).replace('\n', ' '))
                alerts['vers'].append(ver_bzfile)
                print 'ERROR: Insert Rawdata Failed!'
                continue
            # Incr clustering.
            # file described by fd_csv contains all *location enabled* rawdata from wpp_uprecsinfo.
            strWhere = 'WHERE lat!=0 and lon!=0 and ver_uprecs=%s' % ver_bzfile
            cols_ignored = 3  # 3 status cols to be ignored during clustering: ver_uprecs,area_ok,area_try.
            cols_select = ','.join(wppdb.tbl_field[tab_rd][:-cols_ignored])
            sql = wppdb.sqls['SQL_SELECT'] % (cols_select, '%s %s' %
                                              (tab_rd, strWhere))
            rdata_loc = wppdb.execute(sql=sql, fetch_one=False)
            if not rdata_loc: continue  # NO FPs has location info.
            str_rdata_loc = '\n'.join(
                [','.join([str(col) for col in fp]) for fp in rdata_loc])
            fd_csv = StringIO(str_rdata_loc)
            print 'FPs for Incr clustering selected & ready'
            n_inserts = doClusterIncr(fd_csv=fd_csv, wppdb=wppdb, verb=False)
            print 'AlgoData added: [%s] clusters, [%s] FPs' % (
                n_inserts['n_newcids'], n_inserts['n_newfps'])
Example #20
0
    def run(self, scaffold_stats, num_clusters, num_components, K, no_coverage, no_pca, iterations, genome_file, output_dir):
        """Calculate statistics for genomes.

        Parameters
        ----------
        scaffold_stats : ScaffoldStats
            Statistics for individual scaffolds.
        num_clusters : int
            Number of cluster to form.
        num_components : int
            Number of PCA components to consider.
        K : int
            K-mer size to use for calculating genomic signature.
        no_coverage : boolean
            Flag indicating if coverage information should be used during clustering.
        no_pca : boolean
            Flag indicating if PCA of genomic signature should be calculated.
        iterations : int
            Iterations of clustering to perform.
        genome_file : str
            Sequences being clustered.
        output_dir : str
            Directory to write results.
        """

        # get GC and mean coverage for each scaffold in genome
        self.logger.info('')
        self.logger.info('  Determining mean coverage and genomic signatures.')
        signatures = GenomicSignature(K)
        genome_stats = []
        signature_matrix = []
        seqs = seq_io.read(genome_file)
        for seq_id, seq in seqs.iteritems():
            stats = scaffold_stats.stats[seq_id]

            if not no_coverage:
                genome_stats.append((np_mean(stats.coverage)))
            else:
                genome_stats.append(())

            if K == 0:
                pass
            elif K == 4:
                signature_matrix.append(stats.signature)
            else:
                sig = signatures.seq_signature(seq)
                total_kmers = sum(sig)
                for i in xrange(0, len(sig)):
                    sig[i] = float(sig[i]) / total_kmers
                signature_matrix.append(sig)

        # calculate PCA of tetranucleotide signatures
        if K != 0:
            if not no_pca:
                self.logger.info('  Calculating PCA of genomic signatures.')
                pc, variance = self.pca(signature_matrix)
                self.logger.info('    First %d PCs capture %.1f%% of the variance.' % (num_components, sum(variance[0:num_components]) * 100))
    
                for i, stats in enumerate(genome_stats):
                    genome_stats[i] = np_append(stats, pc[i][0:num_components])
            else:
                self.logger.info('  Using complete genomic signature.')
                for i, stats in enumerate(genome_stats):
                    genome_stats[i] = np_append(stats, signature_matrix[i])

        # whiten data if feature matrix contains coverage and genomic signature data
        if not no_coverage and K != 0:
            print '  Whitening data.'
            genome_stats = whiten(genome_stats)
        else:
            genome_stats = np_array(genome_stats)

        # cluster
        self.logger.info('  Partitioning genome into %d clusters.' % num_clusters)

        bError = True
        while bError:
            try:
                bError = False
                _centroids, labels = kmeans2(genome_stats, num_clusters, iterations, minit='points', missing='raise')
            except ClusterError:
                bError = True

        for k in range(num_clusters):
            self.logger.info('    Placed %d sequences in cluster %d.' % (sum(labels == k), (k + 1)))

        # write out clusters
        genome_id = remove_extension(genome_file)
        for k in range(num_clusters):
            fout = open(os.path.join(output_dir, genome_id + '_c%d' % (k + 1) + '.fna'), 'w')
            for i in np_where(labels == k)[0]:
                seq_id = seqs.keys()[i]
                fout.write('>' + seq_id + '\n')
                fout.write(seqs[seq_id] + '\n')
            fout.close()
Example #21
0
    def __add__(self, HRW_class2):

        HRW_new = HRW_class()

        HRW_new.date = self.date  # !!! does not make sense !!!
        HRW_new.detailed = self.detailed  # !!! does not make sense !!!
        HRW_new.channel = np_append(self.channel, HRW_class2.channel)
        HRW_new.wind_id = np_append(self.wind_id, HRW_class2.wind_id)
        HRW_new.prev_wind_id = np_append(self.prev_wind_id,
                                         HRW_class2.prev_wind_id)
        HRW_new.segment_X = np_append(self.segment_X, HRW_class2.segment_X)
        HRW_new.segment_Y = np_append(self.segment_Y, HRW_class2.segment_Y)
        HRW_new.t_corr_method = np_append(self.t_corr_method,
                                          HRW_class2.t_corr_method)
        HRW_new.lon = np_append(self.lon, HRW_class2.lon)
        HRW_new.lat = np_append(self.lat, HRW_class2.lat)
        HRW_new.dlon = np_append(self.dlon, HRW_class2.dlon)
        HRW_new.dlat = np_append(self.dlat, HRW_class2.dlat)
        HRW_new.pressure = np_append(self.pressure, HRW_class2.pressure)
        HRW_new.wind_speed = np_append(self.wind_speed, HRW_class2.wind_speed)
        HRW_new.wind_direction = np_append(self.wind_direction,
                                           HRW_class2.wind_direction)
        HRW_new.temperature = np_append(self.temperature,
                                        HRW_class2.temperature)
        HRW_new.conf_nwp = np_append(self.conf_nwp, HRW_class2.conf_nwp)
        HRW_new.conf_no_nwp = np_append(self.conf_no_nwp,
                                        HRW_class2.conf_no_nwp)
        HRW_new.t_type = np_append(self.t_type, HRW_class2.t_type)
        HRW_new.t_level_method = np_append(self.t_level_method,
                                           HRW_class2.t_level_method)
        HRW_new.t_winds = np_append(self.t_winds, HRW_class2.t_winds)
        HRW_new.t_corr_test = np_append(self.t_corr_test,
                                        HRW_class2.t_corr_test)
        HRW_new.applied_QI = np_append(self.applied_QI, HRW_class2.applied_QI)
        HRW_new.NWP_wind_levels = np_append(self.NWP_wind_levels,
                                            HRW_class2.NWP_wind_levels)
        HRW_new.num_prev_winds = np_append(self.num_prev_winds,
                                           HRW_class2.num_prev_winds)
        HRW_new.orographic_index = np_append(self.orographic_index,
                                             HRW_class2.orographic_index)
        HRW_new.cloud_type = np_append(self.cloud_type, HRW_class2.cloud_type)
        HRW_new.wind_channel = np_append(self.wind_channel,
                                         HRW_class2.wind_channel)
        HRW_new.correlation = np_append(self.correlation,
                                        HRW_class2.correlation)
        HRW_new.pressure_error = np_append(self.pressure_error,
                                           HRW_class2.pressure_error)

        return HRW_new
Example #22
0
    def kmeans(self, scaffold_stats, num_clusters, num_components, K,
               no_coverage, no_pca, iterations, genome_file, output_dir):
        """Cluster genome with k-means.

        Parameters
        ----------
        scaffold_stats : ScaffoldStats
            Statistics for individual scaffolds.
        num_clusters : int
            Number of cluster to form.
        num_components : int
            Number of PCA components to consider.
        K : int
            K-mer size to use for calculating genomic signature
        no_coverage : boolean
            Flag indicating if coverage information should be used during clustering.
        no_pca : boolean
            Flag indicating if PCA of genomic signature should be calculated.
        iterations: int
            iterations to perform during clustering
        genome_file : str
            Sequences being clustered.
        output_dir : str
            Directory to write results.
        """

        # get GC and mean coverage for each scaffold in genome
        self.logger.info('Determining mean coverage and genomic signatures.')
        signatures = GenomicSignature(K)
        genome_stats = []
        signature_matrix = []
        seqs = seq_io.read(genome_file)
        for seq_id, seq in seqs.items():
            stats = scaffold_stats.stats[seq_id]

            if not no_coverage:
                genome_stats.append((np_mean(stats.coverage)))
            else:
                genome_stats.append(())

            if K == 0:
                pass
            elif K == 4:
                signature_matrix.append(stats.signature)
            else:
                sig = signatures.seq_signature(seq)
                total_kmers = sum(sig)
                for i in range(0, len(sig)):
                    sig[i] = float(sig[i]) / total_kmers
                signature_matrix.append(sig)

        # calculate PCA of signatures
        if K != 0:
            if not no_pca:
                self.logger.info('Calculating PCA of genomic signatures.')
                pc, variance = self.pca(signature_matrix)
                self.logger.info(
                    'First {:,} PCs capture {:.1f}% of the variance.'.format(
                        num_components,
                        sum(variance[0:num_components]) * 100))

                for i, stats in enumerate(genome_stats):
                    genome_stats[i] = np_append(stats, pc[i][0:num_components])
            else:
                self.logger.info('Using complete genomic signature.')
                for i, stats in enumerate(genome_stats):
                    genome_stats[i] = np_append(stats, signature_matrix[i])

        # whiten data if feature matrix contains coverage and genomic signature data
        if not no_coverage and K != 0:
            self.logger.info('Whitening data.')
            genome_stats = whiten(genome_stats)
        else:
            genome_stats = np_array(genome_stats)

        # cluster
        self.logger.info(
            'Partitioning genome into {:,} clusters.'.format(num_clusters))

        bError = True
        while bError:
            try:
                bError = False
                _centroids, labels = kmeans2(genome_stats,
                                             num_clusters,
                                             iterations,
                                             minit='points',
                                             missing='raise')
            except ClusterError:
                bError = True

        for k in range(num_clusters):
            self.logger.info('Placed {:,} sequences in cluster {:,}.'.format(
                sum(labels == k), (k + 1)))

        # write out clusters
        genome_id = remove_extension(genome_file)
        for k in range(num_clusters):
            fout = open(
                os.path.join(output_dir,
                             genome_id + '_c%d' % (k + 1) + '.fna'), 'w')
            for i in np_where(labels == k)[0]:
                seq_id = seqs.keys()[i]
                fout.write('>' + seq_id + '\n')
                fout.write(seqs[seq_id] + '\n')
            fout.close()
    def predict_sigle_position(self, sent, pos, prev2_label, prev_label):

        flag = False

        feature_vec, feature_index_list = self.gen_feature(
            sent, pos, prev2_label, prev_label)

        if self.train_mode and self.drop_out:

            to_block = set(
                permutation(arange(self.non_fixed_param))[:self.dropout_size])
            #print 'to_block',list(to_block)[:10]
            block = array(
                [0 if zzz in to_block else 1 for zzz in range(self.pred_size)])

            feature_vec = multiply(feature_vec, block)

        elif self.drop_out:  # for dropout mode at testing time...
            feature_vec = (1 - self.dropout_rate) * feature_vec
            block = None

        else:
            block = None

        if block:
            print 'block=', block

        if flag:
            print 'pos, char=', pos, sent[pos]
            print 'feat_index_list=', feature_index_list, ';features are:', ' '.join(
                [self.index2word[ind] for ind in feature_index_list])

        c0 = sent[pos] if pos < len(sent) else self.END

        pred_tuple = tuple(
            [self.su_prefix + varient + c0 for varient in self.state_varient])
        if pred_tuple[0] in self.vocab and pred_tuple[1] in self.vocab:
            pass
        else:
            pred_tuple = None
            if self.train_mode:
                print 'Unknown candidate! Should NOT happen during training!'
                assert False

        pred_tuple2 = tuple([self.label0_as_vocab, self.label1_as_vocab])

        softmax_score = None
        if pred_tuple:
            pred_index_list = [self.vocab[pred].index for pred in pred_tuple]
            pred_matrix = self.syn1neg[pred_index_list]

            if block is not None:
                pred_matrix = multiply(block, pred_matrix)

            elif self.drop_out:
                pred_matrix = (1 - self.dropout_rate) * pred_matrix

            raw_score = exp(dot(feature_vec, pred_matrix.T))
            softmax_score = raw_score / sum(raw_score)

        pred_index_list2 = [self.vocab[pred].index for pred in pred_tuple2]
        pred_matrix2 = self.syn1neg[pred_index_list2]

        if block is not None:
            pred_matrix2 = multiply(block, pred_matrix2)
        elif self.drop_out:
            pred_matrix = (1 - self.dropout_rate) * pred_matrix2

        raw_score2 = exp(dot(feature_vec, pred_matrix2.T))
        softmax_score2 = raw_score2 / sum(raw_score2)
        #print pred_matrix2.shape, pred_matrix.shape
        if pred_tuple:
            softmax_score2 = np_append(softmax_score2, softmax_score)
            pred_index_list2.extend(pred_index_list)
            pred_matrix2 = np_append(pred_matrix2, pred_matrix, axis=0)
            #print pred_matrix2.shape, pred_matrix.shape

        if flag:
            print 'pred index and item=', pred_index_list2, ' '.join(
                [self.index2word[ind] for ind in pred_index_list2])

        return softmax_score2, feature_index_list, pred_index_list2, feature_vec, pred_matrix2
Example #24
0
    def renderTransCPData(self,
                          fileName="",
                          show=True,
                          elev=45,
                          azim=45,
                          all=False,
                          showAxis=False,
                          primaryWidth=12,
                          primarySpace=3,
                          dpi=300,
                          format='png',
                          fig=None,
                          highlight=None,
                          restrictedBids=[],
                          alpha=1,
                          ignoreContigLengths=False):
        """Plot transformed data in 3D"""
        del_fig = False
        if (fig is None):
            fig = plt.figure()
            del_fig = True
        else:
            plt.clf()
        if (all):
            myAXINFO = {
                'x': {
                    'i': 0,
                    'tickdir': 1,
                    'juggled': (1, 0, 2),
                    'color': (0, 0, 0, 0, 0)
                },
                'y': {
                    'i': 1,
                    'tickdir': 0,
                    'juggled': (0, 1, 2),
                    'color': (0, 0, 0, 0, 0)
                },
                'z': {
                    'i': 2,
                    'tickdir': 0,
                    'juggled': (0, 2, 1),
                    'color': (0, 0, 0, 0, 0)
                },
            }

            ax = fig.add_subplot(131, projection='3d')
            sc = ax.scatter(self.transformedCP[:, 0],
                            self.transformedCP[:, 1],
                            self.transformedCP[:, 2],
                            edgecolors='k',
                            c=self.contigGCs,
                            cmap=self.colorMapGC,
                            vmin=0.0,
                            vmax=1.0,
                            marker='.')
            sc.set_edgecolors = sc.set_facecolors = lambda *args: None  # disable depth transparency effect
            ax.azim = 0
            ax.elev = 0
            ax.set_xlim3d(0, self.scaleFactor)
            ax.set_ylim3d(0, self.scaleFactor)
            ax.set_zlim3d(0, self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis:
                for elt in axis.get_ticklines() + axis.get_ticklabels():
                    elt.set_visible(False)
            ax.w_xaxis._AXINFO = myAXINFO
            ax.w_yaxis._AXINFO = myAXINFO
            ax.w_zaxis._AXINFO = myAXINFO

            ax = fig.add_subplot(132, projection='3d')
            sc = ax.scatter(self.transformedCP[:, 0],
                            self.transformedCP[:, 1],
                            self.transformedCP[:, 2],
                            edgecolors='k',
                            c=self.contigGCs,
                            cmap=self.colorMapGC,
                            vmin=0.0,
                            vmax=1.0,
                            marker='.')
            sc.set_edgecolors = sc.set_facecolors = lambda *args: None  # disable depth transparency effect
            ax.azim = 90
            ax.elev = 0
            ax.set_xlim3d(0, self.scaleFactor)
            ax.set_ylim3d(0, self.scaleFactor)
            ax.set_zlim3d(0, self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis:
                for elt in axis.get_ticklines() + axis.get_ticklabels():
                    elt.set_visible(False)
            ax.w_xaxis._AXINFO = myAXINFO
            ax.w_yaxis._AXINFO = myAXINFO
            ax.w_zaxis._AXINFO = myAXINFO

            ax = fig.add_subplot(133, projection='3d')
            sc = ax.scatter(self.transformedCP[:, 0],
                            self.transformedCP[:, 1],
                            self.transformedCP[:, 2],
                            edgecolors='k',
                            c=self.contigGCs,
                            cmap=self.colorMapGC,
                            vmin=0.0,
                            vmax=1.0,
                            marker='.')
            sc.set_edgecolors = sc.set_facecolors = lambda *args: None  # disable depth transparency effect
            ax.azim = 0
            ax.elev = 90
            ax.set_xlim3d(0, self.scaleFactor)
            ax.set_ylim3d(0, self.scaleFactor)
            ax.set_zlim3d(0, self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis:
                for elt in axis.get_ticklines() + axis.get_ticklabels():
                    elt.set_visible(False)
            ax.w_xaxis._AXINFO = myAXINFO
            ax.w_yaxis._AXINFO = myAXINFO
            ax.w_zaxis._AXINFO = myAXINFO
        else:
            ax = fig.add_subplot(111, projection='3d')
            if len(restrictedBids) == 0:
                if highlight is None:
                    print("BF:", np_shape(self.transformedCP))
                    if ignoreContigLengths:
                        sc = ax.scatter(self.transformedCP[:, 0],
                                        self.transformedCP[:, 1],
                                        self.transformedCP[:, 2],
                                        edgecolors='none',
                                        c=self.contigGCs,
                                        cmap=self.colorMapGC,
                                        s=10.,
                                        vmin=0.0,
                                        vmax=1.0,
                                        marker='.')
                    else:
                        sc = ax.scatter(self.transformedCP[:, 0],
                                        self.transformedCP[:, 1],
                                        self.transformedCP[:, 2],
                                        edgecolors='none',
                                        c=self.contigGCs,
                                        cmap=self.colorMapGC,
                                        vmin=0.0,
                                        vmax=1.0,
                                        s=np_sqrt(self.contigLengths),
                                        marker='.')
                    sc.set_edgecolors = sc.set_facecolors = lambda *args: None  # disable depth transparency effect
                else:
                    #draw the opaque guys first
                    """
                    sc = ax.scatter(self.transformedCP[:,0],
                                    self.transformedCP[:,1],
                                    self.transformedCP[:,2],
                                    edgecolors='none',
                                    c=self.contigGCs,
                                    cmap=self.colorMapGC,
                                    vmin=0.0,
                                    vmax=1.0,
                                    s=100.,
                                    marker='s',
                                    alpha=alpha)
                    sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect
                    """
                    # now replot the highlighted guys
                    disp_vals = np_array([])
                    disp_GCs = np_array([])

                    thrower = {}
                    hide_vals = np_array([])
                    hide_GCs = np_array([])

                    num_points = 0
                    for bin in highlight:
                        for row_index in bin.rowIndices:
                            num_points += 1
                            disp_vals = np_append(
                                disp_vals, self.transformedCP[row_index])
                            disp_GCs = np_append(disp_GCs,
                                                 self.contigGCs[row_index])
                            thrower[row_index] = False
                    # reshape
                    disp_vals = np_reshape(disp_vals, (num_points, 3))

                    num_points = 0
                    for i in range(len(self.indices)):
                        try:
                            thrower[i]
                        except KeyError:
                            num_points += 1
                            hide_vals = np_append(hide_vals,
                                                  self.transformedCP[i])
                            hide_GCs = np_append(hide_GCs, self.contigGCs[i])
                    # reshape
                    hide_vals = np_reshape(hide_vals, (num_points, 3))

                    sc = ax.scatter(hide_vals[:, 0],
                                    hide_vals[:, 1],
                                    hide_vals[:, 2],
                                    edgecolors='none',
                                    c=hide_GCs,
                                    cmap=self.colorMapGC,
                                    vmin=0.0,
                                    vmax=1.0,
                                    s=100.,
                                    marker='s',
                                    alpha=alpha)
                    sc.set_edgecolors = sc.set_facecolors = lambda *args: None  # disable depth transparency effect

                    sc = ax.scatter(disp_vals[:, 0],
                                    disp_vals[:, 1],
                                    disp_vals[:, 2],
                                    edgecolors='none',
                                    c=disp_GCs,
                                    cmap=self.colorMapGC,
                                    vmin=0.0,
                                    vmax=1.0,
                                    s=10.,
                                    marker='.')
                    sc.set_edgecolors = sc.set_facecolors = lambda *args: None  # disable depth transparency effect

                    print(np_shape(disp_vals), np_shape(hide_vals),
                          np_shape(self.transformedCP))

                # render color bar
                cbar = plt.colorbar(sc, shrink=0.5)
                cbar.ax.tick_params()
                cbar.ax.set_title("% GC", size=10)
                cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
                cbar.ax.set_ylim([0.15, 0.85])
                mungeCbar(cbar)
            else:
                r_trans = np_array([])
                r_cols = np_array([])
                num_added = 0
                for i in range(len(self.indices)):
                    if self.binIds[i] not in restrictedBids:
                        r_trans = np_append(r_trans, self.transformedCP[i])
                        r_cols = np_append(r_cols, self.contigGCs[i])
                        num_added += 1
                r_trans = np_reshape(r_trans, (num_added, 3))
                print(np_shape(r_trans))
                #r_cols = np_reshape(r_cols, (num_added,3))
                sc = ax.scatter(r_trans[:, 0],
                                r_trans[:, 1],
                                r_trans[:, 2],
                                edgecolors='none',
                                c=r_cols,
                                cmap=self.colorMapGC,
                                s=10.,
                                vmin=0.0,
                                vmax=1.0,
                                marker='.')
                sc.set_edgecolors = sc.set_facecolors = lambda *args: None  # disable depth transparency effect

                # render color bar
                cbar = plt.colorbar(sc, shrink=0.5)
                cbar.ax.tick_params()
                cbar.ax.set_title("% GC", size=10)
                cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
                cbar.ax.set_ylim([0.15, 0.85])
                mungeCbar(cbar)

            ax.azim = azim
            ax.elev = elev
            ax.set_xlim3d(0, self.scaleFactor)
            ax.set_ylim3d(0, self.scaleFactor)
            ax.set_zlim3d(0, self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            if (not showAxis):
                ax.set_axis_off()

        if (fileName != ""):
            try:
                if (all):
                    fig.set_size_inches(3 * primaryWidth + 2 * primarySpace,
                                        primaryWidth)
                else:
                    fig.set_size_inches(primaryWidth, primaryWidth)
                plt.savefig(fileName, dpi=dpi, format=format)
            except:
                print("Error saving image", fileName, exc_info()[0])
                raise
        elif (show):
            try:
                plt.show()
            except:
                print("Error showing image", exc_info()[0])
                raise
        if del_fig:
            plt.close(fig)
            del fig
Example #25
0
    def renderTransCPData(self,
                          fileName="",
                          show=True,
                          elev=45,
                          azim=45,
                          all=False,
                          showAxis=False,
                          primaryWidth=12,
                          primarySpace=3,
                          dpi=300,
                          format='png',
                          fig=None,
                          highlight=None,
                          restrictedBids=[],
                          alpha=1,
                          ignoreContigLengths=False):
        """Plot transformed data in 3D"""
        del_fig = False
        if(fig is None):
            fig = plt.figure()
            del_fig = True
        else:
            plt.clf()
        if(all):
            myAXINFO = {
                'x': {'i': 0, 'tickdir': 1, 'juggled': (1, 0, 2),
                'color': (0, 0, 0, 0, 0)},
                'y': {'i': 1, 'tickdir': 0, 'juggled': (0, 1, 2),
                'color': (0, 0, 0, 0, 0)},
                'z': {'i': 2, 'tickdir': 0, 'juggled': (0, 2, 1),
                'color': (0, 0, 0, 0, 0)},
            }

            ax = fig.add_subplot(131, projection='3d')
            sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.')
            sc.set_edgecolors = sc.set_facecolors = lambda *args:None  # disable depth transparency effect
            ax.azim = 0
            ax.elev = 0
            ax.set_xlim3d(0,self.scaleFactor)
            ax.set_ylim3d(0,self.scaleFactor)
            ax.set_zlim3d(0,self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis:
                for elt in axis.get_ticklines() + axis.get_ticklabels():
                    elt.set_visible(False)
            ax.w_xaxis._AXINFO = myAXINFO
            ax.w_yaxis._AXINFO = myAXINFO
            ax.w_zaxis._AXINFO = myAXINFO

            ax = fig.add_subplot(132, projection='3d')
            sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.')
            sc.set_edgecolors = sc.set_facecolors = lambda *args:None  # disable depth transparency effect
            ax.azim = 90
            ax.elev = 0
            ax.set_xlim3d(0,self.scaleFactor)
            ax.set_ylim3d(0,self.scaleFactor)
            ax.set_zlim3d(0,self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis:
                for elt in axis.get_ticklines() + axis.get_ticklabels():
                    elt.set_visible(False)
            ax.w_xaxis._AXINFO = myAXINFO
            ax.w_yaxis._AXINFO = myAXINFO
            ax.w_zaxis._AXINFO = myAXINFO

            ax = fig.add_subplot(133, projection='3d')
            sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.')
            sc.set_edgecolors = sc.set_facecolors = lambda *args:None  # disable depth transparency effect
            ax.azim = 0
            ax.elev = 90
            ax.set_xlim3d(0,self.scaleFactor)
            ax.set_ylim3d(0,self.scaleFactor)
            ax.set_zlim3d(0,self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis:
                for elt in axis.get_ticklines() + axis.get_ticklabels():
                    elt.set_visible(False)
            ax.w_xaxis._AXINFO = myAXINFO
            ax.w_yaxis._AXINFO = myAXINFO
            ax.w_zaxis._AXINFO = myAXINFO
        else:
            ax = fig.add_subplot(111, projection='3d')
            if len(restrictedBids) == 0:
                if highlight is None:
                    print "BF:", np_shape(self.transformedCP)
                    if ignoreContigLengths:
                        sc = ax.scatter(self.transformedCP[:,0],
                                   self.transformedCP[:,1],
                                   self.transformedCP[:,2],
                                   edgecolors='none',
                                   c=self.contigGCs,
                                   cmap=self.colorMapGC,
                                   s=10.,
                                   vmin=0.0,
                                   vmax=1.0,
                                   marker='.')
                    else:
                        sc = ax.scatter(self.transformedCP[:,0],
                                   self.transformedCP[:,1],
                                   self.transformedCP[:,2],
                                   edgecolors='none',
                                   c=self.contigGCs,
                                   cmap=self.colorMapGC,
                                   vmin=0.0,
                                   vmax=1.0,
                                   s=np_sqrt(self.contigLengths),
                                   marker='.')
                    sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect
                else:
                    #draw the opaque guys first
                    """
                    sc = ax.scatter(self.transformedCP[:,0],
                                    self.transformedCP[:,1],
                                    self.transformedCP[:,2],
                                    edgecolors='none',
                                    c=self.contigGCs,
                                    cmap=self.colorMapGC,
                                    vmin=0.0,
                                    vmax=1.0,
                                    s=100.,
                                    marker='s',
                                    alpha=alpha)
                    sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect
                    """
                    # now replot the highlighted guys
                    disp_vals = np_array([])
                    disp_GCs = np_array([])

                    thrower = {}
                    hide_vals = np_array([])
                    hide_GCs = np_array([])

                    num_points = 0
                    for bin in highlight:
                        for row_index in bin.rowIndices:
                            num_points += 1
                            disp_vals = np_append(disp_vals, self.transformedCP[row_index])
                            disp_GCs = np_append(disp_GCs, self.contigGCs[row_index])
                            thrower[row_index] = False
                    # reshape
                    disp_vals = np_reshape(disp_vals, (num_points, 3))

                    num_points = 0
                    for i in range(len(self.indices)):
                        try:
                            thrower[i]
                        except KeyError:
                            num_points += 1
                            hide_vals = np_append(hide_vals, self.transformedCP[i])
                            hide_GCs = np_append(hide_GCs, self.contigGCs[i])
                    # reshape
                    hide_vals = np_reshape(hide_vals, (num_points, 3))

                    sc = ax.scatter(hide_vals[:,0],
                                    hide_vals[:,1],
                                    hide_vals[:,2],
                                    edgecolors='none',
                                    c=hide_GCs,
                                    cmap=self.colorMapGC,
                                    vmin=0.0,
                                    vmax=1.0,
                                    s=100.,
                                    marker='s',
                                    alpha=alpha)
                    sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect

                    sc = ax.scatter(disp_vals[:,0],
                                    disp_vals[:,1],
                                    disp_vals[:,2],
                                    edgecolors='none',
                                    c=disp_GCs,
                                    cmap=self.colorMapGC,
                                    vmin=0.0,
                                    vmax=1.0,
                                    s=10.,
                                    marker='.')
                    sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect

                    print np_shape(disp_vals), np_shape(hide_vals), np_shape(self.transformedCP)

                # render color bar
                cbar = plt.colorbar(sc, shrink=0.5)
                cbar.ax.tick_params()
                cbar.ax.set_title("% GC", size=10)
                cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
                cbar.ax.set_ylim([0.15, 0.85])
                mungeCbar(cbar)
            else:
                r_trans = np_array([])
                r_cols=np_array([])
                num_added = 0
                for i in range(len(self.indices)):
                    if self.binIds[i] not in restrictedBids:
                        r_trans = np_append(r_trans, self.transformedCP[i])
                        r_cols = np_append(r_cols, self.contigGCs[i])
                        num_added += 1
                r_trans = np_reshape(r_trans, (num_added,3))
                print np_shape(r_trans)
                #r_cols = np_reshape(r_cols, (num_added,3))
                sc = ax.scatter(r_trans[:,0],
                                r_trans[:,1],
                                r_trans[:,2],
                                edgecolors='none',
                                c=r_cols,
                                cmap=self.colorMapGC,
                                s=10.,
                                vmin=0.0,
                                vmax=1.0,
                                marker='.')
                sc.set_edgecolors = sc.set_facecolors = lambda *args:None  # disable depth transparency effect

                # render color bar
                cbar = plt.colorbar(sc, shrink=0.5)
                cbar.ax.tick_params()
                cbar.ax.set_title("% GC", size=10)
                cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
                cbar.ax.set_ylim([0.15, 0.85])
                mungeCbar(cbar)

            ax.azim = azim
            ax.elev = elev
            ax.set_xlim3d(0,self.scaleFactor)
            ax.set_ylim3d(0,self.scaleFactor)
            ax.set_zlim3d(0,self.scaleFactor)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_zticks([])
            if(not showAxis):
                ax.set_axis_off()

        if(fileName != ""):
            try:
                if(all):
                    fig.set_size_inches(3*primaryWidth+2*primarySpace,primaryWidth)
                else:
                    fig.set_size_inches(primaryWidth,primaryWidth)
                plt.savefig(fileName,dpi=dpi,format=format)
            except:
                print "Error saving image",fileName, exc_info()[0]
                raise
        elif(show):
            try:
                plt.show()
            except:
                print "Error showing image", exc_info()[0]
                raise
        if del_fig:
            plt.close(fig)
            del fig
Example #26
0
    def plotRegion(self, px, py, pz, fileName="", tag="", column=False):
        """Plot the region surrounding a point """
        import matplotlib as mpl

        disp_vals = np_array([])
        disp_cols = np_array([])
        num_points = 0
        # plot all points within span
        (z_lower, z_upper) = self.makeCoordRanges(pz, self.span)
        if column:
            z_lower = 0
            z_upper = self.PM.scaleFactor - 1

        (x_lower, x_upper) = self.makeCoordRanges(px, self.span)
        (y_lower, y_upper) = self.makeCoordRanges(py, self.span)
        for z in range(z_lower, z_upper):
            realz = self.PM.scaleFactor - z - 1
            for x in range(x_lower, x_upper):
                for y in range(y_lower, y_upper):
                    if (x, y, realz) in self.im2RowIndicies:
                        for row_index in self.im2RowIndicies[(x, y, realz)]:
                            if (
                                row_index not in self.PM.binnedRowIndicies
                                and row_index not in self.PM.restrictedRowIndicies
                            ):
                                num_points += 1
                                disp_vals = np_append(disp_vals, self.PM.transformedCP[row_index])
                                disp_cols = np_append(disp_cols, self.PM.contigColours[row_index])

        # make a black mark at the max values
        small_span = self.span / 2
        (x_lower, x_upper) = self.makeCoordRanges(px, small_span)
        (y_lower, y_upper) = self.makeCoordRanges(py, small_span)
        (z_lower, z_upper) = self.makeCoordRanges(pz, small_span)
        for z in range(z_lower, z_upper):
            realz = self.PM.scaleFactor - z - 1
            for x in range(x_lower, x_upper):
                for y in range(y_lower, y_upper):
                    if (x, y, realz) in self.im2RowIndicies:
                        for row_index in self.im2RowIndicies[(x, y, realz)]:
                            if (
                                row_index not in self.PM.binnedRowIndicies
                                and row_index not in self.PM.restrictedRowIndicies
                            ):
                                num_points += 1
                                disp_vals = np_append(disp_vals, self.PM.transformedCP[row_index])
                                disp_cols = np_append(disp_cols, htr(0, 0, 0))
        # reshape
        disp_vals = np_reshape(disp_vals, (num_points, 3))
        disp_cols = np_reshape(disp_cols, (num_points, 3))

        fig = plt.figure()
        ax = fig.add_subplot(111, projection="3d")
        cm = mpl.colors.LinearSegmentedColormap("my_colormap", disp_cols, 1024)
        result = ax.scatter(
            disp_vals[:, 0], disp_vals[:, 1], disp_vals[:, 2], edgecolors=disp_cols, c=disp_cols, cmap=cm, marker="."
        )
        title = str.join(" ", ["Focus at: (", str(px), str(py), str(self.PM.scaleFactor - pz - 1), ")\n", tag])
        plt.title(title)

        if fileName != "":
            fig.set_size_inches(6, 6)
            plt.savefig(fileName, dpi=300)
        elif show:
            plt.show()

        plt.close(fig)
        del fig
Example #27
0
    def findArrayCenter(self, vals):
        """Find the center of the numpy array vals, return the index of the center"""
        # parameters
        current_val_max = -1
        delta = 0
        bounce_amount = 0.1
        height = 0
        last_val = 0

        working = np_array([])
        final_index = -1

        # sort and normalise between 0 -> 1
        sorted_indices = np_argsort(vals)
        vals_sorted = [vals[i] for i in sorted_indices]
        vals_sorted -= vals_sorted[0]
        if vals_sorted[-1] != 0:
            vals_sorted /= vals_sorted[-1]

        # print vals_sorted

        # run through in one direction
        for val in vals_sorted:
            # calculate delta
            delta = val - last_val
            # reduce the current value according to the delta value
            height = self.reduceViaDelta(height, bounce_amount, delta)
            # bounce the ball up
            height += bounce_amount

            # store the height
            working = np_append(working, height)
            final_index += 1

            # save the last val
            last_val = val

        current_val_max = -1
        height = 0
        last_val = 0

        # print "===W==="
        # print working
        # print "===E==="

        # run through in the reverse direction
        vals_sorted = vals_sorted[::-1]
        for val in vals_sorted:
            if last_val == 0:
                delta = 0
            else:
                delta = last_val - val
            height = self.reduceViaDelta(height, bounce_amount, delta)
            height += bounce_amount
            # add to the old heights
            working[final_index] += height
            final_index -= 1
            last_val = val

        # print working
        # print "==EEE=="

        # find the original index!
        return sorted_indices[np_argmax(working)]
Example #28
0
    def plot_coverage(self,
            plt_figure_title: str,
            title: str = "Coverage of {method_name}\nsamples: n1 = {sample_size1}, n2 = {sample_size2}",
            theme: plot_styles = "default",
            colors: Tuple[str,str,str,str,str] = ("gray", "purple", "white", "#b8df96", "green")
            ):
        """
        Plots the `matplotlib.pyplot` figure given the data from previous coverage calculation and
        some captions and formatting.
        """
        if self.coverage is None: raise NoCoverageException(
            "you have to calculate coverage first before plotting it")

        # this unpacked defaultdict trouble allows for optional formatting placeholders
        title = title.format(**defaultdict(str, 
            method_name  = self.method_name,
            sample_size1 = self.sample_size1,
            sample_size2 = self.sample_size2
        ))


        plt.style.use(theme)

        """
        Colorbar range depends on confidence level.
        Sets vmin to a point 10 times farther from 100% than the confidence

        for confidence=95% show colorbar from 50% to 100%;
        for confidence=99% show colorbar from 90% to 100%;
        for confidence=99.9% show colorbar from 99% to 100%;
        """
        vmin = 100 - ( (100-(self.confidence*100))*10 )
        vmax = 100

        """
        In LinearSegmentedColormap specified color points have to span from 0 to 1,
        where 0 would correspond to vmin, and 1 to vmax.

        the 5 specified colors will form a gradient by marking at points below.
        For confidence=95%: (50, 90, 95, 97.5, 100)
        For confidence=99%: (90, 98, 99, 99.5, 100)

        But because the following value is constant, visually, the colorbar itself will always have
        the same gradient regardless of the given `confidence` value
        """
        nodes = (0.0, 0.8, 0.9, 0.95, 1.0)

        cmap = LinearSegmentedColormap.from_list("", list(zip(nodes, colors)))
        cmap.set_under(colors[0])

        fig, ax = plt.subplots()
        fig.canvas.set_window_title(plt_figure_title)

        """
        Would be great if matplotlib supported float128/longdouble. Instead, it converts data
        to float64 with a warning:
            "UserWarning: Casting input data from 'float128' to 'float64' for imshow"

        But!
        float128 precision could possible be utilized while using float64 in this case.
        If we were to display not the value (the coverage, 0-100), but the difference between
        the expected coverage (confidence) and the actual coverage, float64 would do a lot better.
        This can be done, but some adjustments have to be made to colorbar and labels.
        """
        im = ax.imshow(float64(np_array(self.coverage)),
            cmap=cmap,
            norm=Normalize(float(vmin), vmax, True)
        )

        # precision of "8" decimal places should be more than enough for colorbar ticks
        cb = fig.colorbar(im, format=ticker.FuncFormatter(lambda x, pos: (f'%.8f' % x).rstrip('0').rstrip('.')))
        # plot a dashed black line over *confidence* point on a colorbar
        cb.ax.plot([0, 100], [self.confidence*100, self.confidence*100], '_:k')

        # rewriting autogenerated colorbar ticks by adding one that corresponds to `confidence`
        colorbar_ticks = cb.ax.get_yticks()
        colorbar_ticks = np_append(colorbar_ticks, float(self.confidence*100))
        cb.set_ticks(colorbar_ticks)


        plt.title(title, fontsize="large", fontweight="bold")


        # this is reasonable number of ticks so that tick labels won't overlap
        max_num_xticks = 7
        max_num_yticks = 20

        xticks_period = int(np_ceil(len(self.proportions)/max_num_xticks))
        yticks_period = int(np_ceil(len(self.proportions)/max_num_yticks))

        xperiodic_probs = [float(v) for v in self.proportions[::xticks_period]]
        yperiodic_probs = [float(v) for v in self.proportions[::yticks_period]]

        ax.xaxis.set_major_locator(ticker.MultipleLocator(xticks_period))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(yticks_period))
        ax.tick_params(axis='x', labelsize=8)
        ax.tick_params(axis='y', labelsize=9)
        ax.tick_params(top=False)
        ax.tick_params(right=False)
        # ax.xaxis.set_tick_params(labeltop=False)
        # ax.yaxis.set_tick_params(labelright=False)

        # auto-calculated ticks are fine except for redundant first and last ticks
        xticks = ax.get_xticks().tolist()[1:-1]
        yticks = ax.get_yticks().tolist()[1:-1]
        ax.set_xticks(xticks)
        ax.set_yticks(yticks)
        ax.set_xticklabels(xperiodic_probs)
        ax.set_yticklabels(yperiodic_probs)


        self.figure = fig
        return self.figure
    def predict_sigle_position(self, sent, pos, prev2_label, prev_label):

        flag = False

        feature_vec, feature_index_list = self.gen_feature(sent, pos, prev2_label, prev_label)

        if self.train_mode and self.drop_out:

            to_block=set(permutation(arange(self.non_fixed_param))[:self.dropout_size])
            #print 'to_block',list(to_block)[:10]
            block = array([0 if zzz in to_block else 1 for zzz in range(self.pred_size)])

            feature_vec = multiply(feature_vec, block)

        elif self.drop_out: # for dropout mode at testing time...
            feature_vec = (1-self.dropout_rate) * feature_vec
            block = None

        else:
            block = None


        if block:
            print 'block=', block



        if flag:
            print 'pos, char=', pos, sent[pos]
            print 'feat_index_list=', feature_index_list, ';features are:', ' '.join([self.index2word[ind] for ind in feature_index_list])

        c0 = sent[pos] if pos<len(sent) else self.END

        pred_tuple = tuple([self.su_prefix+varient+c0 for varient in self.state_varient])
        if pred_tuple[0] in self.vocab and pred_tuple[1] in self.vocab:
            pass
        else:
            pred_tuple = None
            if self.train_mode:
                print 'Unknown candidate! Should NOT happen during training!'
                assert False

        pred_tuple2 = tuple([self.label0_as_vocab, self.label1_as_vocab])

        softmax_score = None
        if pred_tuple:
            pred_index_list = [self.vocab[pred].index for pred in pred_tuple]
            pred_matrix = self.syn1neg[pred_index_list]

            if block is not None:
                pred_matrix = multiply(block, pred_matrix)

            elif self.drop_out:
                pred_matrix = (1-self.dropout_rate) * pred_matrix



            raw_score = exp(dot(feature_vec, pred_matrix.T))
            softmax_score= raw_score/sum(raw_score)


        pred_index_list2 = [self.vocab[pred].index for pred in pred_tuple2]
        pred_matrix2 = self.syn1neg[pred_index_list2]

        if block is not None:
            pred_matrix2 = multiply(block, pred_matrix2)
        elif self.drop_out:
            pred_matrix = (1-self.dropout_rate) * pred_matrix2

        raw_score2 = exp(dot(feature_vec, pred_matrix2.T))
        softmax_score2= raw_score2/sum(raw_score2)
        #print pred_matrix2.shape, pred_matrix.shape
        if pred_tuple:
            softmax_score2 = np_append(softmax_score2, softmax_score)
            pred_index_list2.extend(pred_index_list)
            pred_matrix2 = np_append(pred_matrix2, pred_matrix, axis=0)
            #print pred_matrix2.shape, pred_matrix.shape

        if flag: print 'pred index and item=', pred_index_list2, ' '.join([self.index2word[ind] for ind in pred_index_list2])

        return softmax_score2, feature_index_list, pred_index_list2, feature_vec, pred_matrix2
Example #30
0
def outputCoverage(wigChrDict,bedLineL,opL,name_mode,strand,mp):
    label = ''
    valueL = np_array([])
    other = ''
    for lineL in bedLineL:
        if label and label != lineL[3]:
            #---------------get the position with maximum value-------
            if mp:
                sort_indexL = argsort(valueL)
                #print valueL
                #print sort_indexL
                maxP = sort_indexL[-1]
                #print 'ORiginal maxP:', maxP
                diff = abs(sort_indexL[-1]-mid)
                maxV = valueL[maxP]
                #print 'Total length:', length
                for i in xrange(-2,length,-1):
                    if valueL[sort_indexL[i]] == maxV:
                        if diff > abs(sort_indexL[i]-mid):
                            diff = abs(sort_indexL[i]-mid)
                            maxP = sort_indexL[i]
                            maxV = valueL[maxP]
                            #print 'Iterated maxP:', maxP
                    else:
                        break
                #----------------------------
                if other:
                    print '%s\t%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start),
                            str(maxP+start+1), name, str(maxV),
                            '\t'.join(other))
                    other = ''
                else:
                    print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start),
                            str(maxP+start+1), name, str(maxV))
            #---------------get the position with maximum value-------
            else:
                print "%s\t%s" % (name, \
                    '\t'.join([str(op(valueL)) for op in opL]))
            valueL = np_array([])
        #----------output ----------------------------------
        #---------------begin process--------------------
        chr = lineL[0]
        start = int(lineL[1])
        end   = int(lineL[2])
        length = start - end - 1 #used for xrange, -1 means include
                                #start-end
        mid = (end-start)/2.0
        label = lineL[3]
        if len(lineL) >= 6:
            other = '\t'.join(lineL[5:])
        if strand:
            strand_in = lineL[5]
            strand_num = 0 if strand_in == '+' else 1
            if name_mode:
                name = ''.join(label, '@', strand_in)
            else:
                name = '\t'.join(lineL)
            valueL = np_append(valueL, \
                [wigChrDict.get(i,[0,0])[strand_num] for i in xrange(start, end)])
        else:
            if name_mode:
                name = label
            else:
                name = '\t'.join(lineL)
            valueL = np_append(valueL, \
                [wigChrDict.get(i,0) for i in xrange(start, end)])
    #---------------------------------------------------------------------
    if label:
        #---------------get the position with maximum value-------
        if mp:
            sort_indexL = argsort(valueL)
            #print valueL
            #print sort_indexL
            maxP = sort_indexL[-1]
            #print 'ORiginal maxP:', maxP
            diff = abs(sort_indexL[-1]-mid)
            maxV = valueL[maxP]
            #print 'Total length:', length
            for i in xrange(-2,length,-1):
                if valueL[sort_indexL[i]] == maxV:
                    if diff > abs(sort_indexL[i]-mid):
                        diff = abs(sort_indexL[i]-mid)
                        maxP = sort_indexL[i]
                        #print maxP
                else:
                    break
            #----------------------------
            if other:
                print '%s\t%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start),
                        str(maxP+start+1), name, str(maxV),
                        '\t'.join(other))
            else:
                print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP+start),
                        str(maxP+start+1), name, str(maxV))
            #print "%s\t%s" % (name, str(maxP+start))
        #---------------get the position with maximum value-------
        else:
            print "%s\t%s" % (name, \
                '\t'.join([str(op(valueL)) for op in opL]))
        valueL = np_array([])
Example #31
0
def get_meshsolution(self, output):
    """Build the MeshSolution objects from the FEA outputs.

    Parameters
    ----------
    self : MagElmer
        a MagElmer object
    output: Output
        An Output object

    Returns
    -------
    meshsol: MeshSolution
        a MeshSolution object with Elmer outputs at every time step
    """
    project_name = self.get_path_save_fea(output)
    elmermesh_folder = project_name
    meshsol = MeshSolution(label="Elmer MagnetoDynamics")
    if not self.is_get_mesh or not self.is_save_FEA:
        self.get_logger().info(
            "MagElmer: MeshSolution is not stored by request.")
        return False

    meshvtk = MeshVTK(path=elmermesh_folder, name="step_t0002", format="vtu")
    meshsol.mesh = [meshvtk]

    result_filename = join(elmermesh_folder, "step_t0002.vtu")
    meshsolvtu = read(result_filename)
    #pt_data = meshsolvtu.point_data
    cell_data = meshsolvtu.cell_data

    #indices = arange(meshsolvtu.points.shape[0])
    indices = arange(meshsolvtu.cells[0].data.shape[0] +
                     meshsolvtu.cells[1].data.shape[0])

    Indices = Data1D(name="indice", values=indices, is_components=True)
    # store_dict = {
    #     "magnetic vector potential": {
    #         "name": "Magnetic Vector Potential A",
    #         "unit": "Wb",
    #         "symbol": "A",
    #         "norm": 1,
    #     },
    #     "magnetic flux density": {
    #         "name": "Magnetic Flux Density B",
    #         "unit": "T",
    #         "symbol": "B",
    #         "norm": 1,
    #     },
    #     "magnetic field strength": {
    #         "name": "Magnetic Field H",
    #         "unit": "A/m",
    #         "symbol": "H",
    #         "norm": 1,
    #     },
    #     "current density": {
    #         "name": "Current Density J",
    #         "unit": "A/mm2",
    #         "symbol": "J",
    #         "norm": 1,
    #     }
    # }
    store_dict = {
        "magnetic flux density e": {
            "name": "Magnetic Flux Density B",
            "unit": "T",
            "symbol": "B",
            "norm": 1,
        },
        "magnetic vector potential e": {
            "name": "Magnetic Vector Potential A",
            "unit": "Wb",
            "symbol": "A",
            "norm": 1,
        },
        "magnetic field strength e": {
            "name": "Magnetic Field H",
            "unit": "A/m",
            "symbol": "H",
            "norm": 1,
        },
        "current density e": {
            "name": "Current Density J",
            "unit": "A/mm2",
            "symbol": "J",
            "norm": 1,
        },
    }
    comp_ext = ["x", "y", "z"]
    sol_list = []
    #for key, value in pt_data.items():
    for key, value in cell_data.items():
        if key in store_dict.keys():
            #siz = value.shape[1]
            siz = value[0].shape[1]
            if siz > 3:
                print("Some Message")
                siz = 3
            components = []
            comp_name = []
            values = np_append(value[0], value[1], axis=0)
            for i in range(siz):
                if siz == 1:
                    ext = ""
                else:
                    ext = comp_ext[i]

                data = DataTime(
                    name=store_dict[key]["name"] + ext,
                    unit=store_dict[key]["unit"],
                    symbol=store_dict[key]["symbol"] + ext,
                    axes=[Indices],
                    #values=value[:, i],
                    values=values[:, i],
                    normalizations={"ref": store_dict[key]["norm"]},
                )

                components.append(data)
                comp_name.append("comp_" + ext)

            if siz == 1:
                field = components[0]
                sol_list.append(
                    SolutionData(
                        field=field,
                        #type_cell="point",
                        type_cell="triangle",
                        label=store_dict[key]["symbol"],
                    ))
            else:
                comps = {}
                for i in range(siz):
                    comps[comp_name[i]] = components[i]
                field = VectorField(name=store_dict[key]["name"],
                                    symbol=store_dict[key]["symbol"],
                                    components=comps)
                sol_list.append(
                    SolutionVector(
                        field=field,
                        #type_cell="point",
                        type_cell="triangle",
                        label=store_dict[key]["symbol"],
                    ))

    meshsol.solution = sol_list
    output.mag.meshsolution = meshsol

    return True
Example #32
0
def main():
    options, args = cmdparameter(sys.argv)
    #-----------------------------------
    bed = options.bed
    wig = options.wig
    strand = options.strand
    verbose = options.verbose
    debug = options.debug
    wigDict = readWig(wig, strand)
    opL = options.op.split(',')
    name_mode = int(options.name)
    #-----------------------------------
    opDict = {'mean':mean, 'median':median, \
            'max':max, 'min':min, 'sum':sum}
    if file == '-':
        fh = sys.stdin
    else:
        fh = open(bed)
    #--------------------------------
    if name_mode:
        print "#name\t%s" % '\t'.join(opL)
    else:
        print "#%s" % '\t'.join(opL)
    label = ''
    valueL = np_array([])
    #print wigDict
    for line in fh:
        lineL = line.strip().split('\t')
        chr = lineL[0]
        start = int(lineL[1])
        end = int(lineL[2])
        innerD = wigDict[chr]
        if label and label != lineL[3]:
            print "%s\t%s" % (name, \
                '\t'.join([str(opDict[op](valueL)) for op in opL]))
            valueL = np_array([])
        #---------------------------------------------------------
        label = lineL[3]
        if strand:
            strand_in = lineL[5]
            if name_mode:
                name = label + '@' + strand_in
            else:
                name = line.strip()
            strand_num = 0 if strand_in == '+' else 1
            valueL = np_append(valueL, [innerD.get(i,[0,0])[strand_num] \
                for i in xrange(start,end)])
        else:
            if name_mode:
                name = lineL[3]
            else:
                name = line.strip()
            valueL = np_append(valueL, [innerD.get(i,0) \
                for i in xrange(start,end)])
        #----------------------------------------------
        #print valueL
        #for op in opL:
        #    tmpL.append(str(opDict[op](valueL)))
    #-------------END reading file----------
    #------for the last name-----------------
    if label:
        print "%s\t%s" % (name, \
            '\t'.join([str(opDict[op](valueL)) for op in opL]))
    #----close file handle for files-----
    if file != '-':
        fh.close()
    #-----------end close fh-----------
    if verbose:
        print >>sys.stderr,\
            "--Successful %s" % strftime(timeformat, localtime())
Example #33
0
def load(satscene,
         calibrate=True,
         area_extent=None,
         read_basic_or_detailed='both',
         **kwargs):
    """Load MSG SEVIRI High Resolution Wind (HRW) data from hdf5 format.
    """

    # Read config file content
    conf = ConfigParser()
    conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg"))
    values = {
        "orbit": satscene.orbit,
        "satname": satscene.satname,
        "number": satscene.number,
        "instrument": satscene.instrument_name,
        "satellite": satscene.fullname
    }

    LOG.info("assume seviri-level5")
    print "... assume seviri-level5"

    satscene.add_to_history("hdf5 data read by mpop/nwcsaf_hrw_hdf.py")

    # end of scan time 4 min after start
    end_time = satscene.time_slot + datetime.timedelta(minutes=4)

    # area !!! satscene.area

    filename = os.path.join(
        satscene.time_slot.strftime(conf.get("seviri-level5", "dir",
                                             raw=True)),
        satscene.time_slot.strftime(
            conf.get("seviri-level5", "filename", raw=True)) % values)

    # define classes before we search for files (in order to return empty class if no file is found)
    HRW_basic = HRW_class()
    HRW_basic.detailed = False
    HRW_basic.date = satscene.time_slot
    HRW_detailed = HRW_class()
    HRW_detailed.detailed = True
    HRW_detailed.date = satscene.time_slot

    print "... search for file: ", filename
    filenames = glob(str(filename))

    if len(filenames) != 0:

        if len(filenames) > 1:
            print "*** Warning, more than 1 datafile found: ", filenames

        filename = filenames[0]
        print("... read data from %s" % str(filename))

        # create an instant of the HRW_class
        m_per_s_to_knots = 1.944

        ## limit channels to read
        #hrw_channels=['HRV']
        # limit basic or detailed or both
        #read_basic_or_detailed='detailed'
        #read_basic_or_detailed='basic'

        with h5py.File(filename, 'r') as hf:

            #print hf.attrs.keys()
            #print hf.attrs.values()

            region_name = hf.attrs['REGION_NAME'].replace("_", "")
            print "... read HRW data for region ", region_name
            LOG.info("... read HRW data for region " + region_name)
            sat_ID = GP_IDs[int(hf.attrs["GP_SC_ID"])]
            print "... derived from Meteosat ", sat_ID
            LOG.info("... derived from Meteosat " + sat_ID)

            # print('List of arrays in this file: \n', hf.keys()), len(hf.keys())

            if len(hf.keys()) == 0:
                print "*** Warning, empty file ", filename
                print ""
            else:
                for key in hf.keys():

                    if key[4:9] == "BASIC":
                        if 'read_basic_or_detailed' in locals():
                            if read_basic_or_detailed.lower() == "detailed":
                                continue
                        HRW_data = HRW_basic  # shallow copy
                    elif key[4:12] == "DETAILED":
                        if 'read_basic_or_detailed' in locals():
                            if read_basic_or_detailed.lower() == "basic":
                                continue
                        HRW_data = HRW_detailed  # shallow copy

                    hrw_chn = dict_channel[key[len(key) - 9:]]

                    if 'hrw_channels' in locals():
                        if hrw_channels != None:
                            if hrw_chn not in hrw_channels:
                                print "... " + hrw_chn + " is not in hrw_channels", hrw_channels
                                print "    skip reading this channel"
                                continue

                    # read all  data
                    channel = hf.get(key)
                    # print '... read wind vectors of channel ', channel.name, hrw_chn
                    # print  "  i    lon        lat      speed[kn] dir   pressure"
                    #for i in range(channel.len()):
                    #    print '%3d %10.7f %10.7f %7.2f %7.1f %8.1f' % (channel[i]['wind_id'], channel[i]['lon'], channel[i]['lat'], \
                    #                                                   channel[i]['wind_speed']*m_per_s_to_knots, \
                    #                                                   channel[i]['wind_direction'], channel[i]['pressure'])
                    # create string array with channel names
                    channel_chararray = np_empty(channel.len(), dtype='|S6')
                    channel_chararray[:] = hrw_chn

                    HRW_data.channel = np_append(HRW_data.channel,
                                                 channel_chararray)
                    HRW_data.wind_id = np_append(HRW_data.wind_id,
                                                 channel[:]['wind_id'])
                    HRW_data.prev_wind_id = np_append(
                        HRW_data.prev_wind_id, channel[:]['prev_wind_id'])
                    HRW_data.segment_X = np_append(HRW_data.segment_X,
                                                   channel[:]['segment_X'])
                    HRW_data.segment_Y = np_append(HRW_data.segment_Y,
                                                   channel[:]['segment_Y'])
                    HRW_data.t_corr_method = np_append(
                        HRW_data.t_corr_method, channel[:]['t_corr_method'])
                    HRW_data.lon = np_append(HRW_data.lon, channel[:]['lon'])
                    HRW_data.lat = np_append(HRW_data.lat, channel[:]['lat'])
                    HRW_data.dlon = np_append(HRW_data.dlon,
                                              channel[:]['dlon'])
                    HRW_data.dlat = np_append(HRW_data.dlat,
                                              channel[:]['dlat'])
                    HRW_data.pressure = np_append(HRW_data.pressure,
                                                  channel[:]['pressure'])
                    HRW_data.wind_speed = np_append(HRW_data.wind_speed,
                                                    channel[:]['wind_speed'])
                    HRW_data.wind_direction = np_append(
                        HRW_data.wind_direction, channel[:]['wind_direction'])
                    HRW_data.temperature = np_append(HRW_data.temperature,
                                                     channel[:]['temperature'])
                    HRW_data.conf_nwp = np_append(HRW_data.conf_nwp,
                                                  channel[:]['conf_nwp'])
                    HRW_data.conf_no_nwp = np_append(HRW_data.conf_no_nwp,
                                                     channel[:]['conf_no_nwp'])
                    HRW_data.t_type = np_append(HRW_data.t_type,
                                                channel[:]['t_type'])
                    HRW_data.t_level_method = np_append(
                        HRW_data.t_level_method, channel[:]['t_level_method'])
                    HRW_data.t_winds = np_append(HRW_data.t_winds,
                                                 channel[:]['t_winds'])
                    HRW_data.t_corr_test = np_append(HRW_data.t_corr_test,
                                                     channel[:]['t_corr_test'])
                    HRW_data.applied_QI = np_append(HRW_data.applied_QI,
                                                    channel[:]['applied_QI'])
                    HRW_data.NWP_wind_levels = np_append(
                        HRW_data.NWP_wind_levels,
                        channel[:]['NWP_wind_levels'])
                    HRW_data.num_prev_winds = np_append(
                        HRW_data.num_prev_winds, channel[:]['num_prev_winds'])
                    HRW_data.orographic_index = np_append(
                        HRW_data.orographic_index,
                        channel[:]['orographic_index'])
                    HRW_data.cloud_type = np_append(HRW_data.cloud_type,
                                                    channel[:]['cloud_type'])
                    HRW_data.wind_channel = np_append(
                        HRW_data.wind_channel, channel[:]['wind_channel'])
                    HRW_data.correlation = np_append(HRW_data.correlation,
                                                     channel[:]['correlation'])
                    HRW_data.pressure_error = np_append(
                        HRW_data.pressure_error, channel[:]['pressure_error'])

                # sort according to wind_id
                inds = HRW_data.wind_id.argsort()
                HRW_data.subset(inds)  # changes HRW_data itself

                # sorting without conversion to numpy arrays
                #[e for (wid,pwid) in sorted(zip(HRW_data.wind_id,HRW_data.prev_wind_id))]

    else:
        print "*** Error, no file found"
        print ""
        sat_ID = "no file"
        # but we continue the program in order to add an empty channel below

    ## filter data according to the given optional arguments
    #n1 = str(HRW_data.channel.size)
    #HRW_data = HRW_data.filter(**kwargs)
    #print "    apply filters "+' ('+n1+'->'+str(HRW_data.channel.size)+')'

    chn_name = "HRW"
    satscene[chn_name].HRW_basic = HRW_basic.filter(
        **kwargs)  # returns new object (deepcopied and filtered)
    satscene[chn_name].HRW_detailed = HRW_detailed.filter(
        **kwargs)  # returns new object (deepcopied and filtered)
    satscene[chn_name].info['units'] = 'm/s'
    satscene[chn_name].info['satname'] = 'meteosat'
    satscene[chn_name].info['satnumber'] = sat_ID
    satscene[chn_name].info['instrument_name'] = 'seviri'
    satscene[chn_name].info['time'] = satscene.time_slot
    satscene[chn_name].info['is_calibrated'] = True
Example #34
0
def main():
    options, args = cmdparameter(sys.argv)
    #-----------------------------------
    bed = options.bed
    wig = options.wig
    strand = options.strand
    verbose = options.verbose
    debug = options.debug
    wigDict = readWig(wig, strand)
    opL = options.op.split(',')
    name_mode = int(options.name)
    #-----------------------------------
    opDict = {'mean':mean, 'median':median, \
            'max':max, 'min':min, 'sum':sum}
    if file == '-':
        fh = sys.stdin
    else:
        fh = open(bed)
    #--------------------------------
    if name_mode:
        print "#name\t%s" % '\t'.join(opL)
    else:
        print "#%s" % '\t'.join(opL)
    label = ''
    valueL = np_array([])
    #print wigDict
    for line in fh:
        lineL = line.strip().split('\t')
        chr   = lineL[0]
        start = int(lineL[1])
        end   = int(lineL[2])
        innerD = wigDict[chr]
        if label and label != lineL[3]:
            print "%s\t%s" % (name, \
                '\t'.join([str(opDict[op](valueL)) for op in opL]))
            valueL = np_array([])
        #---------------------------------------------------------
        label = lineL[3]
        if strand:
            strand_in = lineL[5]
            if name_mode:
                name = label+'@'+ strand_in
            else:
                name = line.strip()
            strand_num = 0 if strand_in=='+' else 1
            valueL = np_append(valueL, [innerD.get(i,[0,0])[strand_num] \
                for i in xrange(start,end)])
        else:
            if name_mode:
                name = lineL[3]
            else:
                name = line.strip()
            valueL = np_append(valueL, [innerD.get(i,0) \
                for i in xrange(start,end)])
        #----------------------------------------------
        #print valueL
        #for op in opL:
        #    tmpL.append(str(opDict[op](valueL)))
    #-------------END reading file----------
    #------for the last name-----------------
    if label:
        print "%s\t%s" % (name, \
            '\t'.join([str(opDict[op](valueL)) for op in opL]))
    #----close file handle for files-----
    if file != '-':
        fh.close()
    #-----------end close fh-----------
    if verbose:
        print >>sys.stderr,\
            "--Successful %s" % strftime(timeformat, localtime())
Example #35
0
        self.assertEqual(z_obj._removed, removed)

    def test_remove_by_indices(self):
        """Test removing a bunch of indices"""
        z_obj = type(self).z_obj
        other_z_obj = deepcopy(z_obj)
        indices = [2, 23]
        z_obj._remove_by_indices(indices)
        for index in indices:
            other_z_obj._remove_by_index(index)
        # Just checking lengths is probably fine
        self.assertEqual(z_obj._lengths, other_z_obj._lengths)

    def calculate_zscores(self):
        """Test calculating zscores"""
        pass

    def test_remove_by_zscores(self):
        """Test removing by zscore"""
        pass


if __name__ == '__main__':
    lengths = _mock_data()
    m, s = mean(lengths), std(lengths)
    # Original data set does not contain any outliers by Z-score
    # Add some extreme values
    zlengths = np_append(lengths, [300, 400, 600, 700])
    outlier_vals = [l for l in zlengths if abs(l - m) / s >= 3]
    print(outlier_vals)
Example #36
0
    def __add__(self, HRW_class2):

        HRW_new = HRW_class()

        HRW_new.date            = self.date      # !!! does not make sense !!! 
        HRW_new.detailed        = self.detailed  # !!! does not make sense !!!
        HRW_new.channel         = np_append(self.channel,         HRW_class2.channel)
        HRW_new.wind_id         = np_append(self.wind_id,         HRW_class2.wind_id)
        HRW_new.prev_wind_id    = np_append(self.prev_wind_id,    HRW_class2.prev_wind_id)
        HRW_new.segment_X       = np_append(self.segment_X,       HRW_class2.segment_X)
        HRW_new.segment_Y       = np_append(self.segment_Y,       HRW_class2.segment_Y)
        HRW_new.t_corr_method   = np_append(self.t_corr_method,   HRW_class2.t_corr_method)
        HRW_new.lon             = np_append(self.lon,             HRW_class2.lon)
        HRW_new.lat             = np_append(self.lat,             HRW_class2.lat)
        HRW_new.dlon            = np_append(self.dlon,            HRW_class2.dlon)
        HRW_new.dlat            = np_append(self.dlat,            HRW_class2.dlat)
        HRW_new.pressure        = np_append(self.pressure,        HRW_class2.pressure)
        HRW_new.wind_speed      = np_append(self.wind_speed,      HRW_class2.wind_speed)
        HRW_new.wind_direction  = np_append(self.wind_direction,  HRW_class2.wind_direction)       
        HRW_new.temperature     = np_append(self.temperature,     HRW_class2.temperature)
        HRW_new.conf_nwp        = np_append(self.conf_nwp,        HRW_class2.conf_nwp)
        HRW_new.conf_no_nwp     = np_append(self.conf_no_nwp,     HRW_class2.conf_no_nwp)
        HRW_new.t_type          = np_append(self.t_type,          HRW_class2.t_type)
        HRW_new.t_level_method  = np_append(self.t_level_method,  HRW_class2.t_level_method)
        HRW_new.t_winds         = np_append(self.t_winds,         HRW_class2.t_winds)
        HRW_new.t_corr_test     = np_append(self.t_corr_test,     HRW_class2.t_corr_test)
        HRW_new.applied_QI      = np_append(self.applied_QI,      HRW_class2.applied_QI)
        HRW_new.NWP_wind_levels = np_append(self.NWP_wind_levels, HRW_class2.NWP_wind_levels)
        HRW_new.num_prev_winds  = np_append(self.num_prev_winds,  HRW_class2.num_prev_winds)
        HRW_new.orographic_index= np_append(self.orographic_index,HRW_class2.orographic_index)
        HRW_new.cloud_type      = np_append(self.cloud_type,      HRW_class2.cloud_type)
        HRW_new.wind_channel    = np_append(self.wind_channel,    HRW_class2.wind_channel)
        HRW_new.correlation     = np_append(self.correlation,     HRW_class2.correlation)
        HRW_new.pressure_error  = np_append(self.pressure_error,  HRW_class2.pressure_error)

        return HRW_new
Example #37
0
    def transfer_learning(self, max_iterations, accepted_mean_square_error=0.1, batch_size=5000, learning_rate=1e-4):

        # Adam optimizer optimizes the parameters (weights and biases) at the learning rate specified
        output_network_optimizer = Adam(self.output_neural_net.parameters(), lr=learning_rate)

        # This is the row count difference between that of input and output grids
        row_difference = self.output_grid_dimension[0] - self.input_grid_dimension[0]

        # This is the column count difference between that of input and output grids
        column_difference = self.output_grid_dimension[1] - self.input_grid_dimension[1]

        # We cycle through iterations of each batch of training the output neural network until the max iteration
        for _ in range(0, max_iterations):

            # Training list - each element in the list contains [input state for output neural net, target value]
            training_list = []

            # Shuffling through batches and then calculating the Mean square error for the entire batch
            for batch in range(0, batch_size):

                # Creating a matrix to hold the observation state of the input neural network map (0 or 1)
                output_network_known_state = np_array([[randint(0, 1) for _ in range(0, self.output_grid_dimension[1])]
                                                      for _ in range(0, self.output_grid_dimension[0])])

                # This is used to store the robot and target state of the input neural network
                input_network_state = np_empty((self.network_robot_count+1)*2, dtype=np_uint8)

                # Creating positions for all of the robots and target of the input neural network randomly
                for i in range(0, (self.network_robot_count+1)*2, 2):
                    input_network_state[i] = randint(0, self.input_grid_dimension[0]-1)
                    input_network_state[i+1] = randint(0, self.input_grid_dimension[1]-1)

                # Creates a backup copy of the input state
                input_network_state_memory = input_network_state

                # Sliding the input network state window over different sections of the output network state
                for i in range(0, row_difference):
                    for j in range(0, column_difference):

                        # Creating a matrix to hold the observation state of the input neural network map (0 or 1)
                        input_network_known_state = output_network_known_state[i:(i+self.input_grid_dimension[0]),
                                                                               j:(j+self.input_grid_dimension[1])]

                        # This is used to store the robot and target state of the output neural network
                        output_network_state = np_empty((self.network_robot_count+1)*2, dtype=np_uint8)

                        # Extending the input position states across the moving window within the output grid dimensions
                        for k in range(0, (self.network_robot_count+1)*2, 2):
                            output_network_state[k] = input_network_state[k]+i
                            output_network_state[k+1] = input_network_state[k+1]+j

                        # Now we flatten data in the input network state, a 2-D matrix to 1-D and append
                        input_network_state = np_append(input_network_state,
                                                        np_ndarray.flatten(input_network_known_state))

                        # Now we flatten data in the output network state, a 2-D matrix to 1-D and append
                        output_network_state = np_append(output_network_state,
                                                         np_ndarray.flatten(output_network_known_state))

                        # Looping through the 4 possible actions each robot can take and then appending them to state
                        for k in range(0, 4):
                            # Adding an action completes the input state for the input neural network
                            input_network_state_tensor = Tensor(np_append(input_network_state, k))
                            # Adding an action completes the input state for the output neural network
                            output_network_state_tensor = Tensor(np_append(output_network_state, k))
                            # Getting the Q value predicted by the input neural network for the given state
                            input_network_predicted_value = self.input_neural_net.forward(input_network_state_tensor)
                            # Now we know the value the output neural network is to be trained towards for its given
                            # input. Add both of them to the training list so that batch training can occur later
                            training_list.append([output_network_state_tensor, input_network_predicted_value])

                        # Restoring the input state from memory
                        input_network_state = input_network_state_memory

            # Shuffling the training data before feeding it in for training
            shuffle(training_list)
            # Initializing the current MSE loss
            sum_square_error = 0.0
            # Using the batch of state and target data for training the output neural network
            for batch in range(0, batch_size):
                # Obtaining the completed input states for the output neural network
                output_network_state_tensor = training_list[batch][0]
                # Obtaining the target predictions that the output neural network should be trained towards
                predicted_target_value = training_list[batch][1]
                # Getting the Q value predicted by the output neural network for the given input state
                output_network_predicted_value = self.output_neural_net.forward(output_network_state_tensor)
                # Adding the current square error to the sum of square errors
                sum_square_error += pow((output_network_predicted_value - predicted_target_value), 2)
                # Represents the function that can calculate training error
                training_error_function = MSELoss()
                # Our goal is to reduce the mean square error loss between the target prediction and that of network
                training_error = training_error_function(output_network_predicted_value, predicted_target_value)
                # Clears the gradients of all optimized torch tensors
                output_network_optimizer.zero_grad()
                # During the backwards pass, gradients from each replica are summed into the original module
                training_error.backward()
                # Training actually happens here. Performs a single optimization step of weights and biases
                output_network_optimizer.step()

            # Dividing the sum of square errors by the batch size to get the mean square error
            current_mean_square_error = sum_square_error/batch_size

            print(current_mean_square_error)

            # Checks if the MSE for the entire batch is within acceptable levels and then returns the output neural net
            if current_mean_square_error <= accepted_mean_square_error:
                # we return a list where true indicates that we achieved the accepted mean square error criteria
                return [self.output_neural_net, True]

        # Failed to completely train the output neural network. Return a list with second element false to indicate this
        return [self.output_neural_net, False]
def outputCoverage(wigChrDict, bedLineL, opL, name_mode, strand, mp):
    label = ''
    valueL = np_array([])
    other = ''
    for lineL in bedLineL:
        if label and label != lineL[3]:
            #---------------get the position with maximum value-------
            if mp:
                sort_indexL = argsort(valueL)
                #print valueL
                #print sort_indexL
                maxP = sort_indexL[-1]
                #print 'ORiginal maxP:', maxP
                diff = abs(sort_indexL[-1] - mid)
                maxV = valueL[maxP]
                #print 'Total length:', length
                for i in xrange(-2, length, -1):
                    if valueL[sort_indexL[i]] == maxV:
                        if diff > abs(sort_indexL[i] - mid):
                            diff = abs(sort_indexL[i] - mid)
                            maxP = sort_indexL[i]
                            maxV = valueL[maxP]
                            #print 'Iterated maxP:', maxP
                    else:
                        break
                #----------------------------
                if other:
                    print '%s\t%s\t%s\t%s\t%s\t%s' % (
                        chr, str(maxP + start), str(maxP + start + 1), name,
                        str(maxV), '\t'.join(other))
                    other = ''
                else:
                    print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP + start),
                                                  str(maxP + start + 1), name,
                                                  str(maxV))
            #---------------get the position with maximum value-------
            else:
                print "%s\t%s" % (name, \
                    '\t'.join([str(op(valueL)) for op in opL]))
            valueL = np_array([])
        #----------output ----------------------------------
        #---------------begin process--------------------
        chr = lineL[0]
        start = int(lineL[1])
        end = int(lineL[2])
        length = start - end - 1  #used for xrange, -1 means include
        #start-end
        mid = (end - start) / 2.0
        label = lineL[3]
        if len(lineL) >= 6:
            other = '\t'.join(lineL[5:])
        if strand:
            strand_in = lineL[5]
            strand_num = 0 if strand_in == '+' else 1
            if name_mode:
                name = ''.join(label, '@', strand_in)
            else:
                name = '\t'.join(lineL)
            valueL = np_append(valueL, \
                [wigChrDict.get(i,[0,0])[strand_num] for i in xrange(start, end)])
        else:
            if name_mode:
                name = label
            else:
                name = '\t'.join(lineL)
            valueL = np_append(valueL, \
                [wigChrDict.get(i,0) for i in xrange(start, end)])
    #---------------------------------------------------------------------
    if label:
        #---------------get the position with maximum value-------
        if mp:
            sort_indexL = argsort(valueL)
            #print valueL
            #print sort_indexL
            maxP = sort_indexL[-1]
            #print 'ORiginal maxP:', maxP
            diff = abs(sort_indexL[-1] - mid)
            maxV = valueL[maxP]
            #print 'Total length:', length
            for i in xrange(-2, length, -1):
                if valueL[sort_indexL[i]] == maxV:
                    if diff > abs(sort_indexL[i] - mid):
                        diff = abs(sort_indexL[i] - mid)
                        maxP = sort_indexL[i]
                        #print maxP
                else:
                    break
            #----------------------------
            if other:
                print '%s\t%s\t%s\t%s\t%s\t%s' % (chr, str(maxP + start),
                                                  str(maxP + start + 1), name,
                                                  str(maxV), '\t'.join(other))
            else:
                print '%s\t%s\t%s\t%s\t%s' % (chr, str(maxP + start),
                                              str(maxP + start + 1), name,
                                              str(maxV))
            #print "%s\t%s" % (name, str(maxP+start))
        #---------------get the position with maximum value-------
        else:
            print "%s\t%s" % (name, \
                '\t'.join([str(op(valueL)) for op in opL]))
        valueL = np_array([])