예제 #1
0
    def set_checkins(self, path, city, n, m, range):
        data_processor = data_process()
        data_processor.set_basic_info(path, city)
        self.pairs_path = data_processor.pairs_path
        self.path = data_processor.path
        self.city = data_processor.city
        self.range = range

        users = data_processor.checkins.uid.unique().tolist()
        users.sort(reverse=False)
        self.users = np.array(deepcopy(users))  # 将用户的id进行从小到大的排序
        del users
        gc.collect()
        self.M = m    # 经度网格
        self.N = n    # 纬度网格
        grid_divider = grid_divide(data_processor.checkins.values.tolist(), self.N, self.M, self.range)
        self.checkins = grid_divider.divide_area_by_NN().ix[:, [0, 1, 2, 3, 4, 5]]  # 带有网格id
        self.latInterval = grid_divider.get_latInterval()
        self.lngInterval = grid_divider.get_lngInterval()
        # self.lons_per_km = 0.005202 * 2  # delta longitudes per kilo meter  0.005681
        # self.lons_per_km = 0.005681 * 2  # 旧金山
        # self.lats_per_km = 0.004492 * 2  # delta latitudes per kilo meter
        self.lons_per_km = 0.0059352 * 2  # NY
        self.lats_per_km = 0.0044966 * 2  # delta latitudes per kilo meter
        print("城市用户数为:", len(self.users))
예제 #2
0
def route_sorting():

    try:
        toPOST = request.args.get('toPOST')  # user input points
    except:
        print("GET failed")
        return jsonify({"message": "request body is not json."}), 400

    if toPOST != {}:
        print("RADIX SORT START!")

        ### Process history data
        try:
            history = data_process(url)  # entire historical typhoon tracks
            point_data = history_point_data(history)  # P(i, j)
        except:
            return jsonify({"message": "Defective JMA API."}), 406

        ### Process similarity model
        try:
            U = json.loads(toPOST)  # convert user inputs to dict
        except:
            return jsonify({"message": "Wrong fromat in points data."}), 400

        ### Success
        return jsonify(radix_sort(history, point_data, U))
    else:
        print('NO USER INPUTS!')
        return jsonify({"message": "user input is empty."}), 400
예제 #3
0
def open_file(file, to_file):
    if (os.path.isdir(file)):  # 判断是否是路径还
        files = dp.read_path(file)
        for file in files:
            data = dp.create_data(file)
            data = dp.data_process(data, 2)
            dp.insert_csv(data, to_file)
    else:
        print("csv文件生成失败!")
        return
예제 #4
0
def trainandshow():
    msg = '展示训练结果'
    s.send(msg.encode('utf-8'))
    print("send len is : [%d]" % len(msg))
    messagebox.showinfo(title='开始训练', message='开始训练')
    train_para.withdraw()
    global train_show
    train_show = Tk()
    train_show.geometry('400x450')
    train_show.title('SQuAD2.0问答系统--训练结果')
    title = Label(train_show, text="训练结果测评", font=("楷体", 20))
    title.place(x=130, y=50)
    data_use = data_process.data_process()

    model_para = server_gui.find_model()
    item = Label(train_show, text=("模型名称: " + model_para.name), font=("楷体", 12), bg="LightGreen")
    item.place(x=100, y=100)
    item1 = Label(train_show, text=("存储路径: " + model_para.output_dir), font=("楷体", 12), bg="LightGreen")
    item1.place(x=100, y=120)
    item2 = Label(train_show, text=("数据集名称: {num}".format(num=model_para.dataset_name)), font=("楷体", 12),
                  bg="LightGreen")
    item2.place(x=100, y=140)
    item3 = Label(train_show, text=("batch_size: {num}".format(num=model_para.batch_size)), font=("楷体", 12),
                  bg="LightGreen")
    item3.place(x=100, y=160)
    item3 = Label(train_show, text=("epoch: {num}".format(num=model_para.epoch)), font=("楷体", 12), bg="LightGreen")
    item3.place(x=100, y=180)
    item4 = Label(train_show, text=("lr: {num}".format(num=model_para.lr)), font=("楷体", 12), bg="LightGreen")
    item4.place(x=100, y=200)

    # 实验结果
    result = data_use.read_result()
    res1 = Label(train_show, text=("EM: {num}".format(num=result["exact"])), font=("楷体", 12), bg="Wheat")
    res1.place(x=100, y=250)
    res2 = Label(train_show, text=("F1 score: {num}".format(num=result["f1"])), font=("楷体", 12), bg="Wheat")
    res2.place(x=100, y=270)
    res3 = Label(train_show, text=("HasAns_exact: {num}".format(num=result["HasAns_exact"])), font=("楷体", 12),
                 bg="Wheat")
    res3.place(x=100, y=290)
    res4 = Label(train_show, text=("HasAns_f1: {num}".format(num=result["HasAns_f1"])), font=("楷体", 12),
                 bg="Wheat")
    res4.place(x=100, y=310)
    res5 = Label(train_show, text=("NoAns_exact: {num}".format(num=result["NoAns_exact"])), font=("楷体", 12),
                 bg="Wheat")
    res5.place(x=100, y=330)
    res6 = Label(train_show, text=("NoAns_f1: {num}".format(num=result["NoAns_f1"])), font=("楷体", 12),
                 bg="Wheat")
    res6.place(x=100, y=350)

    exit_bt = Button(train_show, text='退出', command=sys_exit)
    exit_bt.place(x=350, y=400)

    train_show.mainloop()
예제 #5
0
def data_to_csv(file,to_file):
    if (os.path.isdir(file)):  # 判断是否是路径还
        files = dp.read_path(file)
        for file in files:
            data = dp.create_data(file)
            for i in const.DATA_PART:
                to_files = to_file + i +'.csv'
                print(to_files)
                id = const.DATA_PART.index(i)
                datas = dp.data_process(data,id)
                dp.insert_csv(datas,to_files)
    else:
        print("创建csv文件失败!")
        return
예제 #6
0
 def set_checkins(self, path, city):
     data_processor = data_process()
     data_processor.set_basic_info(path, city)
     data_processor.user_pairs()
     self.pairs_path = data_processor.pairs_path
     self.path = data_processor.path
     self.city = data_processor.city
     users = data_processor.checkins.uid.unique().tolist()
     users.sort(reverse=False)
     self.users = np.array(deepcopy(users))  # 将用户的id进行从小到大的排序
     del users
     gc.collect()
     self.checkins = data_processor.checkins
     print("城市用户数为:", len(self.users))
예제 #7
0
def train(file):
    train_data, train_label, vocab_size, output_size = data_process(file)
    # length, depth = np.shape(train_data)
    batch_size = 6
    # print(np.shape(train_data))
    batches = get_batches(train_data, train_label, batch_size)

    # data = np.reshape(train_data, [length//batch_size, batch_size, depth]) # data:(num_batches, B, T)
    # label = np.reshape(train_label, [length//batch_size, batch_size]) # label:(num_batches, B)

    max_epochs = 1000

    bilstm = BiLSTM(vocab_size, output_size)

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    for epoch in range(max_epochs):
        batches = get_batches(train_data, train_label, batch_size)
        for data, label in batches:
            loss = bilstm.train(sess, data, label)
            # graph = bilstm.save_graph(sess)

        print(loss)
        if loss <= 1e-3:
            break

    # result = []
    y_pred, y_true = [], []
    test = get_batches(train_data, train_label, batch_size)
    for data, label in test:
        pred = bilstm.predict(sess, data)
        # result.extend(pred-label)
        y_pred.extend(pred)
        y_true.extend(label)
    # result = np.array(result)
    # result[result != 0] = 1
    # accuracy = 1 - np.sum(result)/length
    accuracy = accuracy_score(y_true, y_pred)
    print("the accuracy is %s" % accuracy)
    f1 = f1_score(y_true, y_pred, average='macro')
    print("macro-f1 score is %s" % f1)

    bilstm.save_graph(sess)
예제 #8
0
    def result_statistics(self, total_time):
        """
        Process the test results and write it to a log file.
        :param total_time: Program execution time
        :return: Average Requests Number Per Second
        """

        logger = init_logging(self.log_location, self.log_level)
        logger.info("===============================================")
        logger.info('URL: {}'.format(self.urls))
        logger.info('Total Requests Number: {}'.format(self.total_requests))
        logger.info('Concurrent Requests Number: {}'.format(self.concurrency))
        logger.info('Total Time Cost(seconds): {}'.format(total_time))
        logger.info('Average Time Per Request(seconds): {}'.format(
            total_time / self.total_requests))
        logger.info('Average Requests Number Per Second: {}'.format(
            self.total_requests / total_time))
        if hasattr(self, 'time_location'):
            try:
                result_dict = data_process(self.time_location)
                logger.info('Max Request Time(seconds): {}'.format(
                    result_dict['max']))
                logger.info('Min Request Time(seconds): {}'.format(
                    result_dict['min']))
                logger.info('Mean Request Time(seconds): {}'.format(
                    result_dict['mean']))
                logger.info(
                    'Standard Deviation of Request Execution Time(seconds): {:5}'
                    .format(result_dict['std']))
                logger.info(
                    'Execution Time for the First 25% of Request(seconds): {}'.
                    format(result_dict['25.0%']))
                logger.info(
                    'Execution Time for the First 50% of Request(seconds): {}'.
                    format(result_dict['50.0%']))
                logger.info(
                    'Execution Time for the First 75% of Request(seconds): {}'.
                    format(result_dict['75.0%']))
            except UnboundLocalError:
                print('There is not a statistics result.')
        logger.info("===============================================")
        return self.total_requests / total_time
예제 #9
0
 def set_checkins(self, path, city, n, m):
     data_processor = data_process()
     data_processor.set_basic_info(path, city)
     self.pairs_path = data_processor.pairs_path
     self.path = data_processor.path
     self.city = data_processor.city
     users = data_processor.checkins.uid.unique().tolist()
     users.sort(reverse=False)
     self.users = np.array(deepcopy(users))  # 将用户的id进行从小到大的排序
     del users
     gc.collect()
     self.m = m  # 经度网格
     self.n = n  # 纬度网格
     grid_divider = grid_divide(
         data_processor.checkins.values.tolist(), self.n, self.m,
         [30.387953, -97.843911, 30.249935, -97.635460])
     self.checkins = grid_divider.divide_area_by_NN().ix[:,
                                                         [0, 1, 2, 3, 4,
                                                          5]]  # 带有网格id
     self.is_resemble = []  # 用来记录社区内相似用户对
     self.clusterlist = []  # 用来记录社区id
     print("城市用户数为:", len(self.users))
예제 #10
0
from data_process import data_process


# 超参数

# Batch Size
batch_size = 128
# RNN Size
rnn_size = 50
# Number of Layers
num_layers = 2
# Embedding Size
encoding_embedding_size = 15
decoding_embedding_size = 15

data_process = data_process()

def get_inputs():
    '''
    模型输入tensor
    '''
    inputs = tf.placeholder(tf.int32, [None, None], name='inputs')
    targets = tf.placeholder(tf.int32, [None, None], name='targets')
    learning_rate = tf.placeholder(tf.float32, name='learning_rate')

    # 定义target序列最大长度(之后target_sequence_length和source_sequence_length会作为feed_dict的参数)
    target_sequence_length = tf.placeholder(tf.int32, (None,), name='target_sequence_length')
    max_target_sequence_length = tf.reduce_max(target_sequence_length, name='max_target_len')
    source_sequence_length = tf.placeholder(tf.int32, (None,), name='source_sequence_length')
    print("lenghth==", target_sequence_length, "  ", max_target_sequence_length, "  ", source_sequence_length)
예제 #11
0
from data_process import data_process
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

adata, data_after_stop, lables = data_process()
data_tr, data_te, labels_tr, labels_te = train_test_split(adata,
                                                          lables,
                                                          test_size=0.2)

countVectorizer = CountVectorizer()
data_tr = countVectorizer.fit_transform(data_tr)
X_tr = TfidfTransformer().fit_transform(data_tr.toarray()).toarray()

data_te = CountVectorizer(
    vocabulary=countVectorizer.vocabulary_).fit_transform(data_te)
X_te = TfidfTransformer().fit_transform(data_te.toarray()).toarray()

model = GaussianNB()
model.fit(X_tr, labels_tr)
model.score(X_te, labels_te)
예제 #12
0
def cal_precision(testVec, label):
    precision = 0
    for i in range(len(testVec)):
        test_data = testVec[i]
        result = model.classify(myTree, featLabels, test_data)
        if result == label[i]:
            precision = precision + 1
    precision = precision / len(testVec)
    return precision


if __name__ == '__main__':
    #----------------------load数据和进行处理
    data = pd.read_csv("bank-additional-full-train.csv")
    bank_list = data.values.tolist()
    data_processor = process.data_process(bank_list=bank_list)
    #----------------------得到训练样本和测试样本
    data = data_processor.select(train_number, train_feature_number)
    #----------------------生成决策树
    model = dt.decision_tree(select_data=data)
    featLabels = []
    myTree = model.createTree(dataSet=model.select_data[0],
                              labels=model.label(model.select_data[2],
                                                 model.feature_list),
                              featLabels=featLabels)
    featLabels = featLabels[:train_feature_number]
    # print(myTree)
    createPlot(myTree)  #画出决策树
    #----------------------测试决策树效果
    testVec = data[3][:-1]
    label = [data[3][i][-1] for i in range(len(data[3]))]
예제 #13
0
import numpy as np
import pandas as pd
from tbats import TBATS
from data_process import data_process

if __name__ == "__main__":

    process_data = data_process()

    tbats_res = []

    p_pv = []
    temp = np.array(process_data[0])[0]
    estimator = TBATS(seasonal_periods=[7])
    fitted_model = estimator.fit(temp)
    y_1 = fitted_model.forecast(steps=7)
    temp = np.array(process_data[1])[0]
    estimator = TBATS(seasonal_periods=[7])
    fitted_model = estimator.fit(temp)
    y_2 = fitted_model.forecast(steps=7)
    for i in range(5):
        p_pv.append(0.65 * y_1[i] + 0.35 * y_2[i])
    p_6 = (temp[-2] + temp[-9]) * 0.5
    p_7 = (temp[-1] + temp[-8]) * 0.5
    p_pv.append(p_6)
    p_pv.append(p_7)
    tbats_res.append(p_pv)

    p_uv = []
    temp = np.array(process_data[2])[0]
    estimator = TBATS(seasonal_periods=[7])
예제 #14
0
    str(BATCH_SIZE), str(EPOCHS), HISTORY_FORMAT)


# 保存模型和历史记录
def save_model(model, history):
    if not gfile.Exists(MODEL_DIR):
        gfile.MakeDirs(MODEL_DIR)

    model.save(MODEL_FILE)

    if gfile.Exists(HISTORY_DIR) == False:
        gfile.MakeDirs(HISTORY_DIR)

    with open(HISTORY_FILE, 'wb') as f:
        pickle.dump(history.history, f)


if __name__ == "__main__":
    # input1为unembedding数据,input2为embedding数据
    input1, input2, Y_train = data_process()
    model = model(LOSS, OPT, dropout_ALPHA)

    history = model.fit([input1, input2],
                        Y_train,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        verbose=2,
                        validation_split=0.01)

    save_model(model, history)
예제 #15
0
    # ip = "10.1.114.125"
    port = 6000
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    failed_count = 0
    while True:
        try:
            print("start connect to server ")
            s.connect((ip, port))
            break
        except socket.error:
            failed_count += 1
            print("fail to connect to server %d times" % failed_count)
            if failed_count == 100:
                sys.exit(-11)

    read_data = data_process.data_process()
    info, data_ans, data_pred = read_data.read_json()
    login = Tk()
    # main_gui = Tk()

    login.title('登录界面')
    # 设计窗口大小
    login.geometry('210x200')

    # 设计GUI用户登录窗体
    Label(login, text="用户登录").grid(row=0, column=0, columnspan=2)
    Label(login, text="用户名").grid(row=1, column=0)
    name = Entry(login)
    name.grid(row=1, column=1)
    Label(login, text="密码:").grid(row=2, column=0)
    passWord = Entry(login, show='●')
예제 #16
0
파일: run.py 프로젝트: xiaobing007/-PV-
import numpy as np
import pandas as pd
from tbats import TBATS
from params import params_0
from linear_model import model_fit_predict
from data_process import data_process

if __name__ == "__main__":
    # 数据预处理
    data_process()

    data_dir = ['p_pv', 'p_uv', 'r_pv', 'r_uv']

    # 线性全局模型
    linear_res = []
    for idx, name in enumerate(data_dir):
        y_forecasted = model_fit_predict(name)
        linear_res.append(y_forecasted)
    linear_res = pd.DataFrame(list(linear_res)).T
    linear_res.columns = ['p_pv', 'p_uv', 'r_pv', 'r_uv']

    # 传统分解模型
    tbats_res = []
    for idx, name in enumerate(data_dir):
        data = pd.read_csv('processed_data/' + name + '.csv')[[name]].T
        print(idx)
        data = np.array(data)[0]
        estimator = TBATS(seasonal_periods=params_0['seasonal_periods'][idx])
        fitted_model = estimator.fit(data)
        y_forecasted = fitted_model.forecast(steps=7)
        y_forecasted = [x * params_0['after_rate'][idx] for x in y_forecasted]
예제 #17
0
#########		
#The main loop
#########		

		
#The sensor reads the finger print in the desired format 
#and store it in a file the name of which is stored
	
print "Getting fingerprint..."
file = sensor.get("LES");
print "Fingerprint received"

#The data is processed differently for enrollment and query

if len(argv) > 2 and 'e' not in argv[1]:
    to_send = data_process.data_process(file)

else:
    to_send = data_process.data_process(file, True)

i = 0

#For every SMS to send

while i < len(to_send):
	print i+1
	resend = False

        #Clear previous I/O stream

	tty.flush()
# Author:jingyile
# Data:2020/4/3 下午2:10
# Des:数据集的划分

from data_process import data_process
from random import sample
data = data_process()  # 导入经过清洗后的婚姻数据集


def trainTestSplit(data=data, n=1):
    data['realIP'] = data['realIP'].apply(str)  # 将IP地址转为字符类型

    ipCount = data['realIP'].value_counts()  # 统计每个用户的网页浏览数
    reaIP = ipCount[ipCount > n].index  # 找出浏览网页数在n次以上的用户IP
    ipTrain = sample(list(reaIP), int(len(reaIP) * 0.8))  # 训练集用户
    ipTest = [i for i in list(reaIP) if i not in ipTrain]  # 测试集用户

    index_tr = [i in ipTrain for i in data['realIP']]  # 训练用户浏览记录索引
    index_te = [i in ipTest for i in data['realIP']]  # 测试用户浏览记录索引

    dataTrain = data[index_tr]  # 训练集数据
    dataTest = data[index_te]  # 测试集数据
    return dataTrain, dataTest
예제 #19
0
파일: wrapper.py 프로젝트: Z-Gu/GSM_Project
#########

#Use try-except to capture and terminate on 'Ctrl-C' signal

try:
    while True:

        #The sensor reads the finger print in the desired format
        #and store it in a file the name of which is stored

        print "Getting fingerprint..."
        file = sensor.get(form)
        print "Fingerprint received"

        #The data is processed according to the format

        to_send = data_process.data_process(file, form, method)
        to_send = to_send.split(' ')
        for i in to_send:
            while (not send(i)):
                continue

        #Send the data

#		data_transmit.send(tty, to_send, form)

        break

except KeyboardInterrupt:
    pass
예제 #20
0
def sendData_to_trama(DeviceId, DeviceSequence, Mac_slave):
    print("\n>>>>>>>>>>>>>>>>>>>sendData_to_trama!!!!\n")
    #time.sleep(5)
    trama = ""
    Mac_master = "24f7b5e350cc"  #MAC ADDRESS DO MASTER NECESSARIO ALTERAR SE O MASTER TIVER OUTRO MAC ADDRESS
    TramaType = "0"  #0equivalent to configuratio 1 - equivalent to broadcast

    MasterId = DeviceId[0][0:5]
    SlaveId = DeviceId[0][5:8]
    print("MasterId: ", MasterId)
    print("SlaveId: ", SlaveId)

    #Lê valores de configurações dos DEVICE a partir do ficheir JSON para obter configuraçõs dos DEVICE
    with open('DeviceConfiguration.json', 'r') as data_device:
        Device_data = json.load(data_device)
        SIZE_OFF_DEVICE_DATA = (len(Device_data['DeviceConfiguration']))

#Idem para configurações dos SLAVE
    with open('SlaveConfiguration.json', 'r') as data_slave:
        Slave_data = json.load(data_slave)
        SIZE_OFF_SLAVE_DATA = len(Slave_data['SlaveConfiguration'])

#Obtém o ActuationParameter para o SLAVE em questão:
    ActuationParameter = []
    for x in range(0, SIZE_OFF_SLAVE_DATA):
        if SlaveId == Slave_data['SlaveConfiguration'][x][
                'SlaveId'] and MasterId == Slave_data['SlaveConfiguration'][x][
                    'MasterId']:
            ActuationParameter.append(
                Slave_data['SlaveConfiguration'][x]['ActuationParameter'])

    str_actuation = str(ActuationParameter[-1])

    SlopeConvertion = []
    InterceptConvertion = []
    DeviceType = []
    ReadType = []
    trama = ""
    for i in range(0, len(DeviceId)):
        #print("DeviceID: ",DeviceId[i])
        Slope = []
        Intercept = []
        Device_type = []
        Read_type = []
        #Carrega todas as configurações de DEVICES
        for x in range(0, SIZE_OFF_DEVICE_DATA):
            #Utiliza apenas as configurações mais recentes:
            if (DeviceId[i] == Device_data['DeviceConfiguration'][x]
                ['DeviceId']):
                Slope.append(
                    Device_data['DeviceConfiguration'][x]['SlopeConvertion'])
                Intercept.append(Device_data['DeviceConfiguration'][x]
                                 ['InterceptConvertion'])
                Device_type.append(
                    Device_data['DeviceConfiguration'][x]['DeviceType'])
                Read_type.append(
                    Device_data['DeviceConfiguration'][x]['ReadType'])

        #Obter a ultima posição do array, ou seja os ados mais recentes
        if len(Slope) != 0:
            SlopeConvertion.append(Slope[-1])
        if len(Intercept) != 0:
            InterceptConvertion.append(Intercept[-1])
        if len(Device_type) != 0:
            DeviceType.append(Device_type[-1])
        if len(Read_type) != 0:
            ReadType.append(Read_type[-1])
    # print(SlopeConvertion)
    # print(InterceptConvertion)
    # print(DeviceType)
    # print(ReadType)

#Passa o valor do array para uma string, ou seja, aqui é que converte os paramtros na TRAMA:
    str_Slope = ""
    str_Inter = ""
    str_DeviceType = ""
    str_ReadType = ""
    for j in range(0, len(SlopeConvertion)):
        str_Slope = str_Slope + SlopeConvertion[j]
    for j in range(0, len(InterceptConvertion)):
        str_Inter = str_Inter + InterceptConvertion[j]
    for j in range(0, len(DeviceType)):
        str_DeviceType = str_DeviceType + DeviceType[j]
    str_DeviceType = str_DeviceType.replace(" ", "")
    for j in range(0, len(ReadType)):
        str_ReadType = str_ReadType + ReadType[j]
    # print(str_DeviceType)

#Preenchimento com zeros para atingir o comprimento definido:
    if len(DeviceSequence) != 60:
        DeviceSequence = zerofill(DeviceSequence, 60)
    if len(str_ReadType) != 15:
        str_ReadType = zerofill(str_ReadType, 15)
    if len(str_DeviceType) != 45:
        str_DeviceType = zerofill(str_DeviceType, 45)
    if len(str_Slope) != 150:
        str_Slope = zerofill(str_Slope, 150)
    if len(str_Inter) != 150:
        str_Inter = zerofill(str_Inter, 150)
    if len(str_actuation) != 20:
        str_rulle = zerofill(str_actuation, 20)

    #trama de configuration
    print("rulle: ", str_rulle)
    trama = Mac_slave + TramaType + MasterId + SlaveId + DeviceSequence + str_ReadType + str_DeviceType + str_Slope + str_Inter + str_rulle
    print(len(trama), trama)

    nowd = datetime.now()
    nowd = nowd.strftime("%d/%m/%Y %H:%M:%S")
    print(">>>> READING SLAVES....", nowd)

    #Envio da TAMA para o MASTER micrococontroller:
    #Comunicação via série para o microcontrolador, e receção do que lá estiver para receber
    response = TradeAntenna(trama)
    str_response = str(response)
    print(str_response)
    recv_trama = ""
    for x in range(0, len(response)):
        recv_trama = (response[x])
    print("recv_trama:", recv_trama)
    print(len(str_response))
    if len(recv_trama) == 297:
        data_process(
            recv_trama
        )  #Processa toda a informação recebida do SLAVE, gera alarmes, faz broadcast e escreve informação no ficheiro DataSeries.json
    else:
        print("SLAVE NOT RESPONDING -> SIZE: ", len(recv_trama))

    print("\nwaiting for next slave<<<<<<<<<<<<<\n")
parser.add_argument("--do_predict", type=ast.literal_eval, default=True, help="do predict")
parser.add_argument("--do_model", type=str, default="trigger", choices=["trigger", "role"], help="trigger or role")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--eval_step", type=int, default=200, help="eval step")
parser.add_argument("--model_save_step", type=int, default=3000, help="model save step")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--add_crf", type=ast.literal_eval, default=True, help="add crf")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.

# 先把数据处理好保存下来
train_data = data_process(args.train_data, args.do_model)  # 处理训练数据
dev_data = data_process(args.dev_data, args.do_model)  # 处理dev数据
test_data = data_process(args.test_data, args.do_model)
predict_sents, predict_data = data_process(args.predict_data,
                                           args.do_model,
                                           is_predict=True)

write_by_lines("{}/{}_train.tsv".format(args.data_dir, args.do_model),
               train_data)
write_by_lines("{}/{}_dev.tsv".format(args.data_dir, args.do_model), dev_data)
write_by_lines("{}/{}_test.tsv".format(args.data_dir, args.do_model),
               test_data)
write_by_lines("{}/{}_predict.tsv".format(args.data_dir, args.do_model),
               predict_data)

schema_labels = schema_process(args.schema_path, args.do_model)
예제 #22
0
import requests
import numpy as np
import json
import csv
import time
import urllib3
import tensorflow as tf
from data_process import data_process
import boxx
import pickle
'''This predictor is a binary predictor and can predict which team will win in the match, 
the shortcome is that it somehow cannot give a win rate prediction information.'''

data_process_1 = data_process()

# create dicts
heros_id = np.arange(115)
heroes_info_dict = data_process_1.get_hero_data()
id_list = []
for i in range(115):
    id_list.append(int(heroes_info_dict[i]['id']))
id_dict = dict(zip(id_list, heros_id))
print(id_dict)

# read match data
heros_data, results_data = data_process_1.process_data('data_3.csv')

results_data = np.reshape(results_data, [40000, 2])
# map the heros data to a binary matrix
heros_features = data_process_1.map_heros_data_matrix(heros_data, id_dict)
print(heros_features)
예제 #23
0
def typhoon_history():
    return jsonify(data_process(url))
예제 #24
0
        checkpoint,
        directory=config.configs_checkpoint()['directory'],
        max_to_keep=config.configs_checkpoint()['max_to_keep'])
    if manager.latest_checkpoint:
        checkpoint.restore(manager.latest_checkpoint)
    #迭代训练
    for epoch in range(1, epochs + 1):
        start = time.time()
        loss = train_sample(inputs, labels, label_length, optimizer, model)
        end = time.time()
        te = end - start
        #5次保存一个检查点并输出一个loss
        if epoch % 5 == 0:
            manager.save()
            print("Epoch %d/%d" % (epoch, epochs))
            print(
                "%d/%d [==============================] - %ds %dms/step - loss: %.4f"
                % (inputs.shape[0], inputs.shape[0], te,
                   te * 1000 / inputs.shape[0], loss))


if __name__ == "__main__":
    model = DS2()
    epochs = config.configs_train()["train_epochs"]
    data_path = config.configs_train()["data_path"]
    batch_size = config.configs_train()["batch_size"]
    inputs, labels, label_length = data_process(data_path=data_path,
                                                batch_size=batch_size,
                                                if_train_or_test='train')
    optimizer = tf.keras.optimizers.Adam()
    train(model, optimizer, inputs, labels, label_length, epochs)
예제 #25
0
import sys
import os
import data_process


if __name__=="__main__":
	filepath="/Users/mengqwang/Documents/tMall/clean_t_alibaba_data.csv"
	ratio0=int(sys.argv[1])
	ratio1=int(sys.argv[2])
	ratio2=int(sys.argv[3])
	ratio3=int(sys.argv[4])
	ratio4=int(sys.argv[5])
	dp=data_process.data_process(filepath,ratio0,ratio1,ratio2,ratio3,ratio4)
	dp.action2rate()
	dp.dataOutput()
예제 #26
0
def classify(inputTree, featLabels, testVec):
    firstStr = next(iter(inputTree))             #获取决策树结点
    secondDict = inputTree[firstStr]             #下一个字典
    featIndex = featLabels.index(firstStr)
    for key in secondDict.keys():
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featLabels, testVec)
            else:
                classLabel = secondDict[key]
    return classLabel

if __name__ == '__main__':
    data = pd.read_csv("bank-additional-full.csv")
    bank_list = data.values.tolist()
    data_processor = process.data_process(bank_list=bank_list)
    data = data_processor.select(10000, 2)
    dataSet, labels = data[0], label(data[2], feature_list)
    featLabels = []
    myTree = createTree(dataSet, labels, featLabels)
    print(myTree)
    testVec = data[3][:-1]
    label = [data[3][i][-1] for i in range(len(data[3]))]
    precision = 0
    for i in range(len(testVec)):
        result = classify(myTree, featLabels, testVec[i])
        if result == label[i]:
            print('a')
            precision = precision + 1

    precision = precision / len(testVec)
예제 #27
0
# te.to_excel('./te.xlsx')


# ===构建物品相似度矩阵======
cor = jaccard(te)  # 杰卡德相似系数
cor = pd.DataFrame(cor, index=urlTrain, columns=urlTrain)
# cor.to_excel('./cor.xlsx')

# 构建测试集用户网址浏览字典
ipTest = list(set(data_te['realIP']))
dic_te = {ip: list(data_te.loc[data_te['realIP'] == ip, 'fullURL']) for ip in ipTest}

rem = pd.DataFrame(index=range(len(data_te)), columns=['IP', 'url', 'rec1','rec2','rec3','rec4','rec5','recall','precision','R','anum'])
rem['IP'] = list(data_te['realIP'])
rem['url'] = list(data_te['fullURL'])
index = data_process()['fullURL'].value_counts()
for i in rem.index:
    rnum = 0  # 给用户的推荐中用户真正感兴趣的个数
    anum = len(dic_te[rem.loc[i, 'IP']])  # 当前用户实际访问个数    即该用户实际感兴趣的网页个数
    rem.loc[i, 'anum'] = anum
    if rem.loc[i, 'url'] in urlTrain:
        rem.loc[i,'R'] = 1
        index1 = cor.loc[rem.loc[i, 'url']].argmax()
        rem.loc[i, 'rec1'] = cor.index[index1]         # 推荐的网址1
        index2 = cor.loc[rem.loc[i, 'url']].argmax()-1
        rem.loc[i, 'rec2'] = cor.index[index2]         # 推荐的网址2
        index3 = cor.loc[rem.loc[i, 'url']].argmax()-2
        rem.loc[i, 'rec3'] = cor.index[index3]         # 推荐的网址3
        index4 = cor.loc[rem.loc[i, 'url']].argmax()-3
        rem.loc[i, 'rec4'] = cor.index[index4]         # 推荐的网址4
        index5 = cor.loc[rem.loc[i, 'url']].argmax()-4
예제 #28
0
    file_utils.mkdir_path(log_foloder_name)
    log_utils.log_config(log_foloder_name + LOG_NAME)

    #1. 初始化程序运行配置基础信息
    config.init_config(root_path, data_type, get_type)
    config_dict = config.get_config(root_path, data_type, curr_date)

    #2. 开始采集
    run_result = data_list_collection.data_collection(config_dict)
    if run_result < 1:
        logging.error("[1]执行不成功,终止程序运行:%s" % (run_result))

        #清空当天文件夹
        data_list_folder_name = config_dict["data_list_folder_name"]
        if file_utils.clear_folder(data_list_folder_name):
            logging.info("清空文件夹文件:%s" % (data_list_folder_name))

        logging.error("今天[%s]数据采集不成功,请重新运行采集程序!" % (curr_date))
        sys.exit(0)

    #3.数据分析
    data_process.data_process(config_dict)

    #4.新增数据采集
    run_result = data_collection_get_new_data.get_new_data(config_dict)
    if run_result < 1:
        logging.error("[2]执行不成功,终止程序运行:%s" % (run_result))
        sys.exit(0)

    # 5.数据采集检查
    check_data.check_data(curr_date)
예제 #29
0
def main():
    filename='Eur-Lex'
    path='../'+filename+'/' + filename 
    # small checks
    # al=min_max_variance_active_learner(path)
    # al.active_select()
    # return
    """with open(path+'.seed.0','r') as fp:
        line=fp.readline()
    print len(line.strip().split(' '))
    return
    l=learner(path)
    l.read_sparse_data(path+'.train.norm',nfeatures=1837)
    
    sparse_list_data=[1,2,3,4,5,6,7,8]
    sparse_list_row=[0,1,2,3,4,5,6,7]
    sparse_list_col=[0,1,0,1,0,1,0,1]
    Q=csr_matrix((np.array(sparse_list_data),(np.array(sparse_list_row),np.array(sparse_list_col))),shape=(8,2))
    print Q.todense()
    Q=normalize(Q, axis=0, norm='l2')
    print Q.todense()
    return
    """
    # change tradeoff function calls 
    if len(sys.argv)>1:
        input_text = sys.argv[1]
        if input_text in ['tradeoff','check']:
            input_method = sys.argv[2]
        
    else:
        input_text='generate_seed'#tradeoff'#''#'random'#'random'#'check_minmax_l2distance'#'preprocess'
        input_method='random'#'multiple_seed'#'maxquery_vary_delta'
    
    #**********************
    #***Tradeoff Section***
    #**********************
    if input_text in 'tradeoff':
        seed_file_idx=['0']#[str(i) for i in range(5)]
        seed_files=[path+'.seed.'+idx for idx in seed_file_idx]
        max_iter=2
        #**********************************
        if input_method in 'max_query_vary_delta':
            set_of_delta=list(np.arange(.1,.2,.01))
            for fseed in seed_files:
                for delta in set_of_delta:
                    active_learner_general(max_query_active_learner(path,delta=delta), fseed, fsave_ext='.delta_'+str(delta), max_iter=max_iter)
            return
        #*********************************
        if input_method in 'minmax':
            al= min_max_inner_prod(path)
        if input_method in 'minsum':#
            al= min_sum_inner_prod(path)
        if input_method in 'minmin':#
            al= min_min_inner_prod(path)
        if input_method in 'maxquery':#
            delta=.1
            save_fig_interval=50
            al=max_query_active_learner(path,delta=delta, save_fig_interval=save_fig_interval)
        if input_method in 'minmaxvar':
            al=min_max_variance_active_learner(path, count=5)
        if input_method in 'random':#
            al= random_active_learner(path)
        run_tradeoff_exp(al,seed_files,max_iter=max_iter)
    #**********************************************************
    #****************CHECK*************************************
    #**********************************************************
    if input_text in 'check':
        if input_method in 'tuning':
            l=learner(path)
            l.tune()
        if input_method in 'popular_n_rare':
            l=learner(' ')
            acc = l.train_test()
            with open('acc','a') as fp : 
                fp.write(' '.join([str(acc_val) for acc_val in acc])+'\n')
            #d=data_process()
            #d.get_seed(frac=.5,ftrain='train')
            #d.get_label()
            #l.read_out_file()
            #l.decide_popular_n_rare_labels()
            #l.get_acc()
            return
            """
            X,Y,Q=generate_sparse_data(nsamples=5,nlabels=2,nfeatures=2,density=.75)
            al=min_max_inner_prod(path)
            al.Xtrain=X 
            al.Ytrain=Y 
            al.Q=Q 
            for i in range(5):
                al.active_select()
            return"""
        if input_method in 'min_max_l2distance':
            check_min_max_dist_outer(path)    
        if input_method in 'ball_tree':
            X,Y,Q = generate_sparse_data(nsamples=8,nlabels=2,nfeatures=2,density=.75)
            al=min_max_l2distance_lower(path,leaf_size=2)
            al.Xtrain=X 
            al.Ytrain=Y 
            al.Q=Q
            al.create_ball_tree()
            al.brute_force_l2_norm(X,np.array(range(X.shape[0])))
            print('************\n\ndoing active select\n\n *******************')
            al.active_select()
            #al.show_ball_tree_n_points()
        if input_method in 'inner_prod':# checking is left
            k=1
            l=learner(path)
            l.read_metadata()
            # change ftrain  to ftrain_src
            l.ftrain=l.ftrain_src
            # run train and test * create Q, create out file , decide a rank value
            l.train_test(k=k,acc_method='popular_n_rare')# define rank, default k=1
            # read test file, 
            l.Ytest,l.Xtest=l.read_sparse_data(l.ftest, l.nfeatures)
            # compute out file values
            pred_computed = l.compute_k_scores(l.Xtest, l.Q, k=k) # change the output if required later
            Ytest_comp=[]
            score_comp=[]
            for sample_score in pred_computed:
                Ytest_comp.append(sample_score[0][0])
                score_comp.append(sample_score[0][1])
            # compare and report the result
            Ytest_pred, score_pred = l.read_out_file()
            for label1,label2,score1,score2 in zip(Ytest_comp,Ytest_pred, score_comp, score_pred):
                print('label1:'+str(label1)+',label2:'+str(label2)+',score1:'+str(score1)+',score2:'+str(score2)+'\n')
    #**********************************************************
    #****************PLOT*************************************
    #**********************************************************
    if input_text in 'plot':
        if input_method in 'active_score_per_iter':
            path='../results/exp7rep/bibtex.maxquery.count.'
            nfiles=40
            plot_active_score_per_iter(path, nfiles)
        if input_method in 'samples per labels':
            l=learner(path)
            l.read_metadata()
            Ytrain=l.read_sparse_data(l.ftrain_src,l.nfeatures)[0]
            l.find_samples_per_label(l.nlabels,Ytrain)
        if input_method in 'single_seed':
            plot_single_seed()
        if input_method in 'multiple_seed':
            seed_set=[str(i) for i in range(5) ]
            #seed_set=['']
            path='../results/testing/bibtex.acc.'  
            method_list=['minmaxdev','random']
            dp = data_process()
            dp.readfiles_outer(path, method_list, seed_set, num_of_acc=2,fsave=path) # modify
        if input_method in 'ad-hoc':
            path='../results/exp5/e/bibtex.acc.'
            method_list=['maxquery','random']
            file_list = [path+m for m in method_list]
            plot_your_choice(2 , file_list, method_list)
    #**********************************************************
    #****************OTHER*************************************
    #**********************************************************
    if input_text in 'preprocess':
        l = learner(path)
        list_of_files=[path+ext for ext in ['.train', '.test', '.heldout']]
        l.read_metadata()
        l.read_normalize_write_sparse_files(list_of_files,l.nfeatures)
    if input_text in 'min_max_l2distance':
        print('minmax l2 distance')
        leaf_size=20
        bound='lower'
        minmaxl2=min_max_l2distance(path,leaf_size,bound)
        fseed=path+'.seed.0'
        active_learner_general(minmaxl2,fseed)
        #active_learner_general(random_al,frac_seed)
    if input_text in 'generate_seed':
        #print 'seed generation'
        start_idx=0
        nseed_required=1#10
        gen_seed=data_process()
        frac_seed=.1
        ftrain=path+'.train.norm'
        for i in range(nseed_required):
            fseed=path+'.seed.'+str(i+start_idx)
            #print fseed
            #list_of_labels,nlabels =gen_seed.get_label(ftrain)
            seeds=gen_seed.get_seed(frac_seed,ftrain)# how to get nlabels
            #print len(seeds)
            
            #with open(fseed,'w') as fp:
            #    fp.write(' '.join(str(s) for s in seeds))
                
        #seed_generator.get_seed(frac_seed,fseed) 
    if input_text in 'changing_seed':# incomplete
        frac_l=0
        frac_u=0
        frac_diff=0
        gen_seed=data_process()
        for frac in range(frac_l,frac_u, frac_diff):
            seeds=gen_seed.get_seed(frac_seed,ftrain)
            with open(fseed,'w') as fp:
                fp.write(' '.join(str(s) for s in seeds))
예제 #30
0
from data_process import data_process
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix

vectorizer = CountVectorizer()      # 姝ゅ悜閲忚浆鍖栧嚱鏁�
transformer = TfidfTransformer()    # tfidf杞�鍖栧嚱鏁�

adata, labels, data_after_stop = data_process(number_p=10000,number_n=10000)   # 璇诲叆鏁版嵁

data_tr, data_te, labels_tr, labels_te = \
    train_test_split(adata, labels, test_size=0.2)   # 鏁版嵁鎷嗗垎

word_vec_tr = vectorizer.fit_transform(data_tr)      # 璁�缁冮泦鏍锋湰澶勭悊
tfidf_tr = transformer.fit_transform(word_vec_tr)

word_vec_te = CountVectorizer(vocabulary=vectorizer.vocabulary_).fit_transform(data_te)
tfidf_te = transformer.fit_transform(word_vec_te)    # 娴嬭瘯闆嗘牱鏈�澶勭悊

model = MultinomialNB().fit(tfidf_tr,labels_tr)      # 妯″瀷璁�缁�
pre = model.predict(tfidf_te)

sum(labels_te==pre)/len(labels_te)
classification_report(labels_te,pre)
confusion_matrix(labels_te,pre)

예제 #31
0

# 预测
def predict(test, pre_model):
    predict_result = pre_model.predict(test)
    predict_result = list(predict_result.reshape(len(predict_result), ))

    # 根据赛题要求,不是0就是1
    # result=[]
    # for i in predict_result:
    # 	if i < 0.5:
    # 		i = 0
    # 	else:
    # 		i = 1
    # 	result.append(i)

    return predict_result


# 保存为csv
def save_csv(sid, result):
    result_table = pd.DataFrame({"sid": sid, "label": result})
    result_table.to_csv("predict.csv", index=False)


if __name__ == "__main__":
    pre_model = load_model()
    input1, input2, sid = data_process(train=False)
    result = predict([input1, input2], pre_model)
    save_csv(sid, result)
예제 #32
0
from data_process import data_process

data_tr, data_te = trainTestSplit(n=3)

# ===取出训练集用户的IP与浏览网址======
ipTrain = list(set(data_tr['realIP']))
urlTrain = list(set(data_tr['fullURL']))

# 构建测试集用户网址浏览字典
ipTest = list(set(data_te['realIP']))
dic_te = {
    ip: list(data_te.loc[data_te['realIP'] == ip, 'fullURL'])
    for ip in ipTest
}

index = data_process()
index2 = index['fullURL'].value_counts(
)  # 取出热门网址  <class 'pandas.core.series.Series'>
rem = pd.DataFrame(index=range(len(data_te)),
                   columns=[
                       'IP', 'url', 'rec1', 'rec2', 'rec3', 'rec4', 'rec5',
                       'recall', 'precision'
                   ])
rem['IP'] = list(data_te['realIP'])
rem['url'] = list(data_te['fullURL'])
for i in rem.index:
    index3 = index2  # 当前用户不能干扰其与用户
    rnum = 0  # 给用户的推荐中用户真正感兴趣的个数
    anum = len(dic_te[rem.loc[i, 'IP']])  # 当前用户实际访问个数    即该用户实际感兴趣的网页个数
    if rem.loc[i, 'url'] in index3:  # 判断当前浏览网址是否在候选网址集合中
        # del index3[rem.loc[i, 'url']]  # 这样会出现在index3中删除后index2也删除的问题...
예제 #33
0
#########

# Use try-except to capture and terminate on 'Ctrl-C' signal

try:
    while True:

        # The sensor reads the finger print in the desired format
        # and store it in a file the name of which is stored

        print "Getting fingerprint..."
        file = sensor.get(form)
        print "Fingerprint received"

        # The data is processed according to the format

        to_send = data_process.data_process(file, form, method)
        to_send = to_send.split(" ")
        for i in to_send:
            while not send(i):
                continue

                # Send the data

        # 		data_transmit.send(tty, to_send, form)

        break

except KeyboardInterrupt:
    pass