Ejemplo n.º 1
0
from keras.layers.core import Activation
from keras.layers import Embedding
from keras.layers import Input, Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.callbacks import EarlyStopping
from keras.initializers import glorot_normal, glorot_uniform
from gensim.models.keyedvectors import KeyedVectors
from keras import optimizers
from keras.utils import to_categorical
from keras import regularizers

#Lectura de los datos
input_size = 20
print("Leyendo datos de entrenamiento...")
data_train, label_train = readData(
    'tass_2018_task_4_subtask2_train_dev/SANSE_train-2.tsv', input_size)

print(data_train.shape)
print(label_train.shape)

print("Leyendo datos de desarrollo...")
data_dev, label_dev = readData(
    'tass_2018_task_4_subtask2_train_dev/SANSE_dev-2.tsv', input_size)

print(data_dev.shape)
print(label_dev.shape)

print("Leyendo datos de test...")
data_test_1, id_test_1 = readDataTest('/Users/nuria/SEPLN/test-s2.tsv',
                                      input_size)
Ejemplo n.º 2
0
 def load(self):
     self.data = read_data.readData(self)
     self.report(self.data.filepath)
Ejemplo n.º 3
0
def processData(filePath,file_1,train = True):
    
    df = readData(filePath,file_1)
       
    print("================这是一条分割线=============")
    
    
    df = df.drop(['unnamed: 0'],axis = 1)
    ## 先连接其他数据
    df.drop([
             '未结清贷款account_count',
             '未销户贷记卡account_count',
             '未结清贷款credit_limit',
             '未销户贷记卡credit_limit',
             '未销户贷记卡max_credit_limit_per_org',
             '未销户贷记卡min_credit_limit_per_org',
             '未结清贷款balance',
             '未销户贷记卡used_credit_limit',
             '未结清贷款latest_6m_used_avg_amount',
             '未销户贷记卡latest_6m_used_avg_amount',
             'changing_amount',
             'last_months'],axis = 1,inplace = True)
    if train:    
        df_temp = pd.read_csv('D:\\workspace python\\contest\\qihuoout.csv')
    else:
        df_temp = pd.read_csv('D:\\workspace python\\contest\\out_test.csv')
    df = pd.merge(df,df_temp,left_on = 'report_id',
                      right_on = 'report_id',how = 'left')
    del df_temp
    
    #df.sort_values(by='y',inplace = True)
    ## 变量名与变量格式修改
    df.drop('id_card',axis = 1,inplace = True)
    
    df.drop('loan_date',axis = 1,inplace = True)
    
    df.drop('agent',axis = 1,inplace = True)
    
	
    ## 以后再处理了
    df.work_province.isnull().sum()
    
    ## edu
    df.edu_level.value_counts()
    mapping = {'本科':'本科及以上',
               '硕士研究生':'本科及以上',
               '博士研究生':'本科及以上',
               '硕士及以上':'本科及以上',
               '高中':'专科以下',
               '初中':'专科以下',
               '专科及以下':'专科以下',
               '其他':'专科以下',
               '专科':'专科'}
    
    df.edu_level = df.edu_level.map(mapping)
    df.edu_level = df.edu_level.fillna('missing')
    
    del mapping
    ## 公积金
    df.has_fund = df.has_fund.fillna('missing')
    
    ## 婚姻
    df.marry_status.value_counts()
    mapping = {'离婚':'离婚',
               '离异':'离婚',
               '丧偶':'其他',
               '已婚':'已婚',
               '未婚':'未婚',
               '其他':'其他'}
    df.marry_status = df.marry_status.map(mapping)
    
    del mapping
    
    ## 收入
    df.salary.isnull().sum()
    
    
    df.salary = df.salary.fillna('missing')
    ## y
    if train:
        df.y = df.y.astype('category')
    
    '''
    pd.crosstab(df.y,df.salary).div(
            pd.crosstab(
                    df.y,df.salary).sum(1).astype(float),axis = 0).plot(
                    kind = 'bar')
    '''      
    ####------下一个
    df['ln_settle_rate'] = df.贷款结清比例
    del df['贷款结清比例']
    
    df['ln_settle_rate'] = groupImmpute(df,'ln_settle_rate')
    '''
    df.boxplot('ln_settle_rate',by = 'y')
    '''
    ## 贷款是否异常
    df['ln_abnormal'] = df.贷款是否异常
    del df['贷款是否异常']
    
	
	df.ln_abnormal = df.ln_abnormal.fillna('missing')
Ejemplo n.º 4
0
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 16 17:44:11 2015

@author: zc
"""

from sklearn.externals import joblib
#
#clf = joblib.load('svmClassifier.pkl') 

import read_data
import  numpy as np

a=read_data.readData(1,'rbf');
print a

#data_set=[1,2,3,4]
#kernel=['sig','rbf','poly','linear']
#
#accu_matrix=np.ones([4,4])
#for i in range(0,4):
#    for j in range(0,4):
#        accu_matrix[i,j]=read_data.readData(data_set[i],kernel[j])
#        print data_set[i],kernel[j],accu_matrix[i,j]
        
#      ([[ 0.8590604 ,  0.8590604 ,  0.17785235,  0.8590604 ],
#       [ 0.8283611 ,  0.8407594 ,  0.51452925,  0.82874855],
#       [ 0.72736626,  0.71707819,  0.16049383,  0.72530864],
#       [ 0.68787328,  0.69047001,  0.34484549,  0.687873        ]])
Ejemplo n.º 5
0
    client = airsim.VehicleClient()
    client.confirmConnection()

    # Camera Information
    camera = client.simGetCameraInfo("3")
    test_camera = client.simGetCameraInfo("1")
    if (test_camera.pose != down):
        client.simSetCameraOrientation("1", down)
        client.simSetCameraOrientation("2", down)
    FoV = 90

# Coordinate transformation
enu_2_ned = utils.qnorm(airsim.Quaternionr(0.7071068, 0.7071068, 0, 0), 10)

# panda read csv
position, attitude, accelerometer, gyroscope, noisy_accelerometer, noisy_gyroscope, time = traj.readData(
    '/home/tigerteam/gnss-ins-sim/demo_saved_data/openIMU_landing/')

# Get position and attitude data from panda read
ned_position_data = traj.getPosition(position)
ned_quaternion_data = traj.getAttitude(attitude)
time_new = traj.getTime(time)

# Gate for including simulated IMU data or perfect IMU readings
if (TRUTH_IMU):
    print("Perfect IMU Data Selected")
    ned_accel = traj.getAccel(accelerometer)
    ned_gyro = traj.getGyro(gyroscope)
else:
    print("Noisy IMU Data Selected")
    ned_accel = traj.getNoisyAccel(noisy_accelerometer)
    ned_gyro = traj.getNoisyGyro(noisy_gyroscope)
Ejemplo n.º 6
0
 def importFile(self):
     
     #import UI execute
     #data_path = ui
     df = read_data.readData()
Ejemplo n.º 7
0
def runMeta(book,
            sentences,
            wsent,
            char_list,
            job_labels,
            gender_label,
            job=False,
            gender=False,
            sentiment=False):
    """
    Compute various metadata about characters in char_list
    :param sentences: list(dict)
        List of dicts. Each dict is a sentence
        and contains 'nostop', 'words', 'tags'
    :param wsetn: dictionary of sentences by character
    :param char_list: list(unicode)
        List of character names in unicode
        Compound names are concatenated as in sentences
    :param job_labels: dict of character -> [job label]
    """
    ### GLOBAL PARAMS
    # classifier_data_dict has keys [u'tromper', u'nutrition', u'\xe9motions', u'dormir', u'raison', u'\xe9tats', u'vouloir', u'tuer', u'gu\xe9rir', u'relations', u'm\xe9tiers', u'salutations', u'soupir', u'pens\xe9e', u'parole', u'foi']
    classifier_data_dict = readData()
    sents_by_char = wsent
    word2vec_model = wordSimilarity.MyModel()
    char_list = list(reversed(char_list))  # by decreasing mention count
    save_path = 'metadata/' + book + '_'

    ################ JOBS #################
    # Define parameters
    job_list = classifier_data_dict[u'm\xe9tiers']
    N_CHARS = 10  # Num of chars to compute scores for -> default all
    predictors = ['count', 'proximity']

    if job:
        # Compute predictions
        df_job_full_const = jobPredictor(sentences,
                                         wsent,
                                         char_list,
                                         job_labels,
                                         job_list,
                                         word2vec_model,
                                         decreasing=False,
                                         full=True)
        df_job_full_decr = jobPredictor(sentences,
                                        wsent,
                                        char_list,
                                        job_labels,
                                        job_list,
                                        word2vec_model,
                                        decreasing=True,
                                        full=True)
        df_job_expo_const = jobPredictor(sentences,
                                         wsent,
                                         char_list,
                                         job_labels,
                                         job_list,
                                         word2vec_model,
                                         decreasing=False,
                                         full=False)
        df_job_expo_decr = jobPredictor(sentences,
                                        wsent,
                                        char_list,
                                        job_labels,
                                        job_list,
                                        word2vec_model,
                                        decreasing=True,
                                        full=False)

        # Save to csv
        df_job_full_const.to_csv(save_path + 'job_full_const.csv',
                                 encoding='utf-8')
        df_job_full_decr.to_csv(save_path + 'job_full_decr.csv',
                                encoding='utf-8')
        df_job_expo_decr.to_csv(save_path + 'job_expo_decr.csv',
                                encoding='utf-8')
        df_job_expo_const.to_csv(save_path + 'job_expo_const.csv',
                                 encoding='utf-8')

    ################## GENDER ###################

    # Load gender dict

    if gender:
        # Compute predictions
        gender_nosolo = genderPredictor(book,
                                        sentences,
                                        sents_by_char,
                                        char_list,
                                        gender_label,
                                        full=True,
                                        solo=False)
        gender_solo = genderPredictor(book,
                                      sentences,
                                      sents_by_char,
                                      char_list,
                                      gender_label,
                                      full=True,
                                      solo=True)
        gender_nosolo_w = genderPredictor(book,
                                          sentences,
                                          sents_by_char,
                                          char_list,
                                          gender_label,
                                          full=True,
                                          solo=False,
                                          weighted=True)
        gender_solo_w = genderPredictor(book,
                                        sentences,
                                        sents_by_char,
                                        char_list,
                                        gender_label,
                                        full=True,
                                        solo=True,
                                        weighted=True)

        # Save to csv
        gender_nosolo.to_csv(save_path + 'gender_nosolo.csv', encoding='utf-8')
        gender_solo.to_csv(save_path + 'gender_solo.csv', encoding='utf-8')
        gender_nosolo_w.to_csv(save_path + 'gender_nosolo_w.csv',
                               encoding='utf-8')
        gender_solo_w.to_csv(save_path + 'gender_solo_w.csv', encoding='utf-8')

    if sentiment:
        # # Compute predictions
        sentiment_nosolo = sentimentPredictor(sentences,
                                              sents_by_char,
                                              char_list,
                                              reduced=False)
        sentiment_nosolo.to_csv(save_path + 'sentiment_nosolo_top.csv',
                                encoding='utf-8')

        # sentiment_solo = sentimentPredictor(sentences, sents_by_char, char_list, reduced=False, solo=True)
        # sentiment_solo.to_csv(save_path + 'sentiment_solo_top.csv', encoding='utf-8')
        # sentimentPredictor(book, sentences, sents_by_char, char_list, reduced=False, write=True)

    # Print stats
    tokens = 0
    job_len = len([item for item in job_labels.values() if item])
    job_tok = len(
        [item for sublist in job_labels.values() for item in sublist])
    gender_len = len([item for item in gender_label.values() if item != '-'])
    for s in sentences:
        tokens += len(s['words'])
    print('{}, {}, {}, ({}, {}), {}'.format(book, tokens, len(char_list),
                                            job_len, job_tok, gender_len))
Ejemplo n.º 8
0
        keepProb = keepProb = np.zeros_like(out_layer)
        keepProb[np.arange(self.train_number), self.input_label] = 1.0
        for i in range(fb_numbers):
            self.output_weight += -np.dot(self.hidden_layer[-1].T, out_layer -
                                          keepProb) / self.train_number

            # pre_tidu=1
            # if self.layer_numbers>3:
            #     pre_tidu*=np.dot(keepProb - out_layer,self.output_weight.T)*self.sigmoidDaoShu(self.hidden_layer[-1])  #表达式中的wji和之前的一大堆相乘
            #     self.weights_hidden[-1] += -np.dot(self.hidden_layer[-2].T, pre_tidu)
            # # pre_sigema= #
            pre_tidu = keepProb - out_layer
            pre_weight = self.output_weight
            for i in range(0, self.layer_numbers - 3):
                #隐藏层中的梯度表达式,以及隐藏层到输出层的表达式
                pre_tidu = np.dot(pre_tidu, pre_weight.T) * self.sigmoidDaoShu(
                    self.hidden_layer[-i - 1]),
                self.weights_hidden[-1 - i] += -np.dot(
                    self.hidden_layer[-2 - i].T, pre_tidu)
                pre_weight = self.weights_hidden[-1 - i]
                # pre_weight=self.weights_hidden[-1-i]
            #隐藏层到输出层的表达式
            # pre_tidu = np.dot(pre_tidu, self.weights1.T) * self.sigmoidDaoShu(self.hidden_layer[1]),
            # self.weights1 += -np.dot(self.hidden_layer[0].T, pre_tidu)
        return


if __name__ == "__main__":
    train_obj = read_data.readData()
    train_data, train_label = read_data.read_picture_data(True)