Esempio n. 1
0
def main():
    with open('../result/hudong/dataTables.data', 'r') as f:
        dataTables = pickle.load(f)

    auto_marks = []
    for nn, dataTable in enumerate(dataTables):
        mentions = []
        for i in xrange(dataTable.row):
            for j in xrange(dataTable.col):
                mentions.append(dataTable[i][j])
        auto_mark = MachineLearning.main(mentions, dataTable.row,
                                         dataTable.col)
        auto_marks.append(auto_mark)
        #print auto_marks
        print u'第%d个表格标注完成' % nn

    with open('../result/hudong/taiyun/auto_mark.data', 'w') as f:
        json.dump(auto_marks, f)
    #-------------------------------------------------------------------------------

    with open('../result/baidu/dataTables.data', 'r') as f:
        dataTables = pickle.load(f)

    auto_marks = []
    for nn, dataTable in enumerate(dataTables):
        mentions = []
        for i in xrange(dataTable.row):
            for j in xrange(dataTable.col):
                mentions.append(dataTable[i][j])
        auto_mark = MachineLearning.main(mentions, dataTable.row,
                                         dataTable.col)
        auto_marks.append(auto_mark)
        print u'第%d个表格标注完成' % nn

    with open('../result/baidu/taiyun/auto_mark.data', 'w') as f:
        json.dump(auto_marks, f)

    #-------------------------------------------------------------------------------

    with open('../result/wiki/dataTables.data', 'r') as f:
        dataTables = pickle.load(f)

    auto_marks = []
    for nn, dataTable in enumerate(dataTables):
        mentions = []
        for i in xrange(dataTable.row):
            for j in xrange(dataTable.col):
                mentions.append(dataTable[i][j])
        auto_mark = MachineLearning.main(mentions, dataTable.row,
                                         dataTable.col)
        auto_marks.append(auto_mark)
        print u'第%d个表格标注完成' % nn

    with open('../result/wiki/taiyun/auto_mark.data', 'w') as f:
        json.dump(auto_marks, f)
Esempio n. 2
0
def svm_machine_learn_no_model(file_twin,
                               file_kink,
                               file_cluster,
                               filetrace_twin,
                               filetrace_kink,
                               filetrace_cluster,
                               filetrace,
                               filename_mark,
                               smooth=0):
    # 读twin的数据文件
    # 没有模型可以导入的话
    print('svm_machine_learn with no model is running')
    f_twin = Screening.read_file(file_twin)
    # 提取出所有有用信号的频域向量
    result1 = Screening.data_fre(f_twin, filetrace_twin, smooth)
    fre_range = result1[0]
    fre_twin = result1[1]

    # 读kink的数据文件
    f_kink = Screening.read_file(file_kink)
    result2 = Screening.data_fre(
        f_kink, filetrace_kink,
        smooth)  # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]]
    fre_kink = result2[1]

    # 读需要分类的数据文件
    f = Screening.read_file(file_cluster)
    result3 = Screening.data_fre(f, filetrace_cluster, smooth)
    fre = result3[1]

    # svm学习
    label_fre = MachineLearning.skl_svm(
        fre_twin, fre_kink, fre, filetrace,
        filename_mark)  # [cluster_label, data_fre]
    result = MachineLearning.ave_fre(
        label_fre, fre_range)  # [[frequency range], [[fre1_twin],[fre2_kink]]]
    temp = [['fre-range', 'twin', 'kink']]
    array = np.array([result[0], result[1][0], result[1][1]])
    array = array.T  # 第一列fre-range,第二列twin,第三列kink
    arr = array.tolist()
    for i in arr:
        temp.append(i)
    # 保存文件
    filename = 'SVM_averange_frequency.csv'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename
    np.savetxt(f, temp, fmt='%s', delimiter=',')
    print('SVM_averange_frequency File made')

    # 保存文件, 每个cluster的平均频谱
    filename = 'SVM_label.csv'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename
    np.savetxt(f, label_fre[0], fmt='%s', delimiter=',')
    print('SVM_label File made')
    return label_fre[0]
Esempio n. 3
0
def main():
	with open('../result/hudong/dataTables.data','r') as f:
		dataTables = pickle.load(f)

	auto_marks = []
	for nn, dataTable in enumerate(dataTables):
		mentions = []
		for i in xrange(dataTable.row):
			for j in xrange(dataTable.col):
				mentions.append(dataTable[i][j])
		auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col)
		auto_marks.append(auto_mark)
		#print auto_marks
		print u'第%d个表格标注完成'%nn

	with open('../result/hudong/taiyun/auto_mark.data','w') as f:
		json.dump(auto_marks, f)
	#-------------------------------------------------------------------------------

	with open('../result/baidu/dataTables.data','r') as f:
		dataTables = pickle.load(f)

	auto_marks = []
	for nn, dataTable in enumerate(dataTables):
		mentions = []
		for i in xrange(dataTable.row):
			for j in xrange(dataTable.col):
				mentions.append(dataTable[i][j])
		auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col)
		auto_marks.append(auto_mark)
		print u'第%d个表格标注完成'%nn

	with open('../result/baidu/taiyun/auto_mark.data','w') as f:
		json.dump(auto_marks, f)

	#-------------------------------------------------------------------------------

	with open('../result/wiki/dataTables.data','r') as f:
		dataTables = pickle.load(f)

	auto_marks = []
	for nn, dataTable in enumerate(dataTables):
		mentions = []
		for i in xrange(dataTable.row):
			for j in xrange(dataTable.col):
				mentions.append(dataTable[i][j])
		auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col)
		auto_marks.append(auto_mark)
		print u'第%d个表格标注完成'%nn

	with open('../result/wiki/taiyun/auto_mark.data','w') as f:
		json.dump(auto_marks, f)
def MakeModel():
    checkpoint_path = '/tmp/' + str(datetime.datetime.now())

    optimizer = keras.optimizers.Adam(lr=0.0006,
                                      beta_1=0.96,
                                      beta_2=0.99999,
                                      epsilon=1e-2)

    descriptor = 'Final training of model'
    filefmt = 'weights.Epoch-{epoch:03d};Loss-{val_loss:.6f}.hdf5'

    #Model = ML.SimpleModel(Train_data,optimizer)
    Model = ML.ModularModel(Train_data, optimizer, layers=4, nodes=4 * 256)

    History = ML.TrainModel(Model,
                            Train_data,
                            Train_label,
                            EPOCHS=500,
                            min_delta=0.0,
                            patience=20,
                            PERIOD=0,
                            BATCH=45,
                            val_data=tuple([Test_data, Test_label]),
                            checkpoint_path=checkpoint_path,
                            file_name=filefmt,
                            Descriptor=descriptor)

    ML.PlotHistory(
        History,
        save_path=checkpoint_path.replace('.', ':').replace(':', '-') + '/')
    Predictions = ML.Predict(Model, Test_data)
    grph.PlotHistory(
        Predictions, Test_label,
        os.getcwd().replace('\\', '/') +
        checkpoint_path.replace('.', ':').replace(':', '-') + '/')
    grph.PlotHistory2018(
        Predictions, Test_label,
        os.getcwd().replace('\\', '/') +
        checkpoint_path.replace('.', ':').replace(':', '-') + '/')
    grph.PlotHistoryDiff(
        Predictions, Test_label,
        os.getcwd().replace('\\', '/') +
        checkpoint_path.replace('.', ':').replace(':', '-') + '/')
    grph.PlotHistory2018percent(
        Predictions, Test_label,
        os.getcwd().replace('\\', '/') +
        checkpoint_path.replace('.', ':').replace(':', '-') + '/')

    Offset = Predictions - Test_label
    OffsetP = Offset / Test_label * 100
    return History, Model, OffsetP, Offset
Esempio n. 5
0
def draw_clusterings_kmeans(result, filetrace):
    label = result[0]
    n_cluster = len(set(label))
    X = np.array(result[1])
    # PCA 降维
    X = MachineLearning.skl_pca(X, demen=2)
    x_standard = X[0]
    # 对数据进行[0,1]标准化
    min_max_scaler = preprocessing.MinMaxScaler()
    # 标准化训练集数据
    x_standard = min_max_scaler.fit_transform(x_standard)

    cluster = [[] for i in range(n_cluster)]
    for i in range(len(label)):
        for j in range(n_cluster):
            if label[i] == j:
                cluster[j].append(x_standard[i])
    # 保存文件
    filenumber = 0
    for i in cluster:
        filenumber = filenumber + 1
        filename = 'KMeans_cluster-cluter' + str(
            filenumber) + r'-Normalization.csv'
        f = filetrace + '\\' + 'File after Processing' + '\\' + filename
        np.savetxt(f, i, fmt='%s', delimiter=',')
    print('KMeans 2D Image File made!')
Esempio n. 6
0
def draw_clusterings_svm(result, filetrace, filename_mark):
    label = result[0]
    print(len(label))
    X = np.array(result[1])
    # PCA 降维
    X = MachineLearning.skl_pca(X, demen=2)
    x_standard = X[0]
    # 对数据进行[0,1]标准化
    min_max_scaler = preprocessing.MinMaxScaler()
    # 标准化训练集数据
    x_standard = min_max_scaler.fit_transform(x_standard)
    cluster_twin = []
    cluster_kink = []
    for i in range(len(label)):
        if label[i] == 0:
            cluster_twin.append(x_standard[i])
        if label[i] == 1:
            cluster_kink.append(x_standard[i])
    # 保存文件
    filename1 = 'SVM_cluster-twin-Normalization'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename1 + filename_mark + '.csv'
    np.savetxt(f, cluster_twin, fmt='%s', delimiter=',')
    filename2 = 'SVM_cluster-kink-Normalization'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename2 + filename_mark + '.csv'
    np.savetxt(f, cluster_kink, fmt='%s', delimiter=',')
    print('SVM 2D Image File made!')
Esempio n. 7
0
def model_test(file, filename, filetrace, filetrace_file_cluster, smooth):
    f = Screening.read_file(file)
    data_fre_range_fre = Screening.data_fre(
        f, filetrace_file_cluster,
        smooth)  # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]]
    print(data_fre_range_fre[1])
    result = MachineLearning.svm_model(data_fre_range_fre[1], filetrace)
    file = filetrace + r'\Model Test' + '\\' + filename + '-Label.csv'
    np.savetxt(file, result[0], fmt='%s', delimiter=',')
    print('Model Test Over!')
    # 测试文件为twin:
    n_twin = 0
    n_kink = 0
    print(len(result[0]))
    for i in result[0]:
        if i == 0:
            n_twin = n_twin + 1
        if i == 1:
            n_kink = n_kink + 1
    # print('kink: ',n_kink)
    # print('twin: ', n_twin)
    accuracy = n_twin / (n_twin + n_kink)
    print('Accuracy: ', accuracy)
    txtfile = ['Accuracy: ' + str(accuracy)]
    file = filetrace + r'\Model Test' + '\\' + filename + '-Accuracy.txt'
    np.savetxt(file, txtfile, fmt='%s', delimiter=',')
Esempio n. 8
0
def run(filename):
    print "Reading File"
    training,test=getimagelists(filename)
    features=[]
    labels=[]
    
    d={}
    for line in training:
        print "Processing file: " + line.split(tab)[0]
        l,f,d=processtrainingimage.process(line,d)
        for i in range(len(f)):
            features.append(f[i])
        for i in range(len(l)):
            labels.append(l[i])
    o=open('output.txt','w')
    print "outputting"
    for i in xrange(len(labels)):
        output = labels[i]
        for j in xrange(len(features[i])):
            output+=tab+str(features[i][j])
        o.write(output+'\n')
    print "Converting nested list to array"
    features=np.array(features,dtype=float)
    '''
    features, labels= loadFile("output.txt")
    '''
    print "Building Machine Learning Models"
    model = MachineLearning.ml(features,labels)
    print "Starting Testing Images!"
    for line in test:
        print "Processing file: " + line.split(tab)[0]
        ProcessTestImage.runWalk(line,40,model)
Esempio n. 9
0
    def reproduction(self, qq0, qqe, tt, T):
        '''
        :param xx0: 规划起点
        :param gg: 规划目标点
        :param tt: 规划中采样时刻
        :return: 采样时刻对应的规划位置
        '''
        # 规划时间
        ss = np.exp(-(self.alpha / self.tau) * tt)
        num = len(ss)

        # 求取强迫项,用rbf求取强迫性
        f = np.zeros([num, self.m])
        for i in range(num):
            f[i, :] = ml.rbf_oput_nout(ss[i], self.c, self.sigma, self.w)
        self.f = f
        print f.shape

        # 求取末端位置
        XX = np.zeros([num, self.m])

        x_dot = np.zeros(self.m)
        xx = np.copy(qq0)

        # 采用迭代发求微分方程
        for i in range(num):
            for j in range(self.m):
                [xx[j], x_dot[j]] = dmps_solve_2(
                    self.tau, self.k[j], self.d[j],
                    qqe[j], qq0[j], ss[i], T, f[i, j],
                    xx[j], x_dot[j])
                XX[i, j] = xx[j]
        self.xx = np.copy(XX)

        return self.xx
Esempio n. 10
0
def kmeans_machine_learn(file, filetrace, filetrace_file_cluster, smooth,
                         cluster):
    print('kmeans_machine_learn is running')
    f = Screening.read_file(file)
    # 提取出所有有用信号的频域向量
    frequency = Screening.data_fre(
        f, filetrace_file_cluster,
        smooth)  # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]]
    fre_range = frequency[0]
    fre = frequency[1]
    # kmeans 学习部分
    label_fre = MachineLearning.skl_kmeans(
        fre, cluster=cluster)  # [cluster_label, data_fre]
    result = MachineLearning.kmeans_ave_fre(
        label_fre, fre_range, cluster
    )  # [[frequency range], [[fre1_cluster1],[fre2_cluster2],[fre3_cluster3],[fre4_cluster4], ...]]
    n = len(set(label_fre[0]))
    title = ['fre-range']
    for i in range(n):
        title.append(str(i))
    temp = [result[0]]
    for i in result[1]:
        temp.append(i)
    temp1 = [title]
    temp = np.transpose(temp).tolist()
    for i in temp:
        temp1.append(i)
    # 保存文件
    filename = 'Kmeans_averange_frequency.csv'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename
    np.savetxt(f, temp1, fmt='%s', delimiter=',')
    print('Kmeans_averange_frequency File made')

    # 保存文件, 每个cluster的平均频谱
    filename = 'Kmeans_label.csv'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename
    np.savetxt(f, label_fre[0], fmt='%s', delimiter=',')
    print('Kmeans_label File made')

    # 绘制分类结果二维视图
    # result = [label, [[cluster1],[cluster2],[cluster3],...]]
    DrawImage.draw_clusterings_kmeans(label_fre, filetrace)
    return label_fre[0]
Esempio n. 11
0
def bilibili_train():
    csv_path = r'../data/bilibili_data.csv'

    data = bilibili_read_data(csv_path)

    data = divide_data(data[0], data[1])

    variables = ['danmu', 'reply', 'favorite', 'coin', 'share', 'like']

    print(len(data.get('train_vecs')))
    print(len(data.get('train_exps')))

    train_vecs = data.get('train_vecs')
    train_exps = data.get('train_exps')

    MachineLearning.normalize_median(train_vecs, train_exps)

    model = MachineLearning.perceptron(variables=variables,
                                       train_vecs=train_vecs,
                                       train_exps=train_exps)

    model.train(train_iter_num=10000, rate=0.01)
Esempio n. 12
0
def wine_train():
    csv_path = r'winequality-red.csv'
    input_vecs = []
    input_exps = []
    with open(csv_path, 'r', encoding='utf-8') as file:
        reader = csv.reader(file, delimiter=';')
        for line in reader:
            # print(line)
            temp = [eval(_i) for _i in line]
            line = temp
            input_vecs.append(line[0:-1])
            # print(input_vecs)
            # import os
            # os._exit(-1)
            input_exps.append(line[-1])

    features = [
        'fixed acidity',
        'volatile acidity',
        'citric acid',
        'residual sugar',
        'chlorides',
        'free sulfur dioxide',
        'total sulfur dioxide',
        'density',
        'pH',
        'sulphates',
        'alcohol',
    ]
    variables = [0.0 for _ in features]
    MachineLearning.normalize_median(input_vecs, input_exps)

    model = MachineLearning.perceptron(variables=variables,
                                       train_vecs=input_vecs,
                                       train_exps=input_exps)

    model.train(train_iter_num=10000, rate=0.0001)
Esempio n. 13
0
def svm_machine_learn_model(file_cluster,
                            filetrace_cluster,
                            filetrace,
                            draw=0,
                            smooth=0):
    # 有模型可以导入的话
    # 读需要分类的数据文件
    print('svm_machine_learn with model is running')
    f = Screening.read_file(file_cluster)
    # 提取出所有有用信号的频域向量
    result = Screening.data_fre(f, filetrace_cluster, smooth)
    fre_range = result[0]
    fre = result[1]
    label_fre = MachineLearning.svm_model(
        fre, filetrace)  # [cluster_label, data_fre]
    result = MachineLearning.ave_fre(
        label_fre, fre_range)  # [[frequency range], [[fre1_twin],[fre2_kink]]]
    temp = [['fre-range', 'twin', 'kink']]
    array = np.array([result[0], result[1][0], result[1][1]])
    array = array.T  # 第一列fre-range,第二列twin,第三列kink
    arr = array.tolist()
    for i in arr:
        temp.append(i)
    # 保存文件
    filename = 'SVM_averange_frequency.csv'
    # filetrace = r'C:\Users\liuhanqing\Desktop\test\AE'
    f = filetrace + '\\' + filename
    np.savetxt(f, temp, fmt='%s', delimiter=',')
    print('SVM_averange_frequency File made')

    # 保存文件, 每个cluster的平均频谱
    filename = 'SVM_label.csv'
    # filetrace = r'C:\Users\liuhanqing\Desktop\test\AE'
    f = filetrace + '\\' + filename
    np.savetxt(f, label_fre[0], fmt='%s', delimiter=',')
    print('SVM_label.csv File made')
    return label_fre[0]
def CVTest(test='layers',
           start=4,
           finish=14,
           optimizer=keras.optimizers.Adam(lr=0.001,
                                           beta_1=0.95,
                                           beta_2=0.999,
                                           epsilon=1e-4)):

    val_labels = Data_NoNAN.loc[:, 'Diesel':'Total']
    val_data = fcn.StandardizeData(Data_NoNAN.loc[:, 'Year':'Gust']).fillna(0)

    listOfErrors = ML.CrossValidation(data=val_data,
                                      labels=val_labels,
                                      test=test,
                                      start=start,
                                      finish=finish,
                                      optimizer=optimizer)
    return listOfErrors
Esempio n. 15
0
    def learn(self):
        # 计算DMPS模型计算强迫项
        f_demo = np.zeros([self.num, self.n])
        for i in range(self.n):
            f_demo[:, i] = (self.tau * self.tau * self.qq_qva[:, i, 2] + self.d[i] * self.qq_qva[:, i, 1]) \
                           / self.k[i] - (self.qq_qva[-1, i, 0] * np.ones(self.num) - self.qq_qva[:, i, 0]) \
                           - (self.qq_qva[-1, i, 0] - self.qq_qva[0, i, 0]) * self.ss
        self.f_demo = f_demo

        # 采用rbf拟合强迫项,求取rbf参数
        # 中心值,h*m个,时间变量为m=1
        c = np.linspace(self.ss[0], self.ss[-1], self.h)
        # 方差
        sigma = abs(self.ss[0] - self.ss[-1])
        # rbf隐藏层到输出层权重n*h
        w = ml.rbf_weight_oput_nout(self.ss, c, sigma, f_demo)

        # 存储rbf参数
        rbf_param = np.zeros([self.n + 2, self.h])
        rbf_param[0, 0] = sigma  # 存在第1位,方差sigma
        rbf_param[1, :] = c  # 第二行存放中心值
        rbf_param[2:self.n + 2, :] = w  # 第三行后存储权重
        self.rbf_param = rbf_param
Esempio n. 16
0
def runExperiment(df, feature_sets, times, batch_size):
    tick = time.time()
    outputs = []
    for feature_set in feature_sets:
        classifiers = dict(
            online=onlineML.getOnlineClassifiers(),
            offline=offlineML.getOfflineClassifiers(df.shape[1])
        )
        y, X = ml.getDataForML(df, features=feature_set, feature_predict=feature_predict, sampling=False)
        n_features = X.shape[1]
        print 'total features ', n_features
        print 'total samples ', X.shape[0]
        times['preparing_time'] += time.time() - tick

        cls_stats = offlineML.runOfflineML(y, X, classifiers['online'])
        output = [cls_stats, classifiers['online'], feature_set, '5fold']
        outputs.append(output)

        # online
        cls_stats = onlineML.runOnlineML(y, X, classifiers['online'], batch_size=batch_size)
        output = [cls_stats, classifiers['online'], feature_set, str(batch_size) + 'batch']
        outputs.append(output)
        #cls_name, cls in classifiers.items():

        cls_stats = offlineML.runOfflineML(y, X, classifiers['offline'])
        output = [cls_stats, classifiers['offline'], feature_set, '5fold']
        outputs.append(output)

        cls_stats = onlineML.runOnlineML(y, X, classifiers['offline'], batch_size=batch_size)
        output = [cls_stats, classifiers['offline'], feature_set, str(batch_size) + 'batch']
        outputs.append(output)
        #saveClassificationResults(DIR + 'results/accuracy_ML.csv', output)
        #theanoTest(y,X)
        pt.plotEverything(cls_stats, times, len(X))

    return outputs
Esempio n. 17
0
def main():
    reduced_filename = KNMI.PATH[:KNMI.PATH.rindex('.')] + ".csv"
    df = pd.read_csv(reduced_filename)
    trn, dev, tst = ml.Lq_Fit.seperate_trn_dev_tst(df)

    final_filename = KNMI.PATH[:KNMI.PATH.rindex('.')] + "_final.csv"
    df_final = pd.read_csv(final_filename)

    att = input("Which attribute do you want to analyse? ")
    att = att.upper()

    print("You asked for:", KNMI.attributes[att])

    uni.att_values(df, att)
    uni.boxplot_att(df, att, save=False)
    uni.histogram_att(df, att, save=False)

    plot_att_year(df, [], att)
    plot_att_year_bok(df, [], att)
    plot_att_month(df, [], att)

    for other_att in MEAN_ATTS:
        print("\nFinding correlation of", KNMI.attributes[att], "with",
              KNMI.attributes[other_att])
        print("Correlation is", df_final[att].corr(df[other_att]))

        plot_att_conditional(df, [], att, other_att)

        choice = ""
        while choice != "y" and choice != "n" and choice != "s":
            choice = input("Do you want regression over these two " +
                           "attributes?\nyes (y), no (n), " +
                           "yes with switched axis (s): ")
            choice = choice.lower()

        if choice == "y":
            poly = ml.try_poly_fit(trn, dev, att, other_att)
            ml.plot_poly(tst, poly, att, other_att)

        elif choice == "s":
            poly = ml.try_poly_fit(trn, dev, other_att, att)
            ml.plot_poly(tst, poly, other_att, att)
Esempio n. 18
0
def machine_learning():
	algo_list = ["knn", "svm", "gbc", "rfc", "nn"]

	# Check argument
	if request.args.get('images_directory') is None :
		return 'No "images_directory" given.'

	if request.args.get('algorithm') is None :
		return 'No "algorithm" given.'

	if request.args.get('save_directory') is None :
		return 'No "save_directory" given.'


	images_directory = request.args.get('images_directory')
	algorithm = str(request.args.get('algorithm'))
	save_directory = request.args.get('save_directory')

	# creates new MachineLearning object
	if algorithm == "nn":
		ml = MachineLearning(images_directory, save_directory, 32)
	else:
		ml = MachineLearning(images_directory, save_directory)
	
	# error detection
	if len(ml.imgs) == 0 or len(ml.labels) == 0:
		app.logger.error("No images were read!")
		return "Error: No images were read!"

	if algorithm in algo_list:
		score_train, score_test = ml.train(algorithm, ml.imgs, ml.labels)
	else:
		app.logger.warning("Unexpected algorithm choice, choosing default!")
		algorithm = "svm"
		score_train, score_test = ml.train(algorithm, ml.imgs, ml.labels)

	return '\"' + algorithm + '\":{\"train_acc\":'+str(score_train)+' ,\"val_acc\":'+str(score_test)+'}'
Esempio n. 19
0
    def __init__(self, network, config, reward_engine):
        super().__init__(network, config)
        self.epoch_counter = self.counters[config['AgentEpochCounter']]
        self.iter_counter = self.counters[config['AgentIterationCounter']]
        # Placeholder
        self.temp = tf.placeholder(shape=[1], dtype=tf.float32)
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        # Algorithm
        self.output = tf.reshape(self.output_layer, [-1])
        self.prob_dist = tf.nn.softmax(self.output / self.temp)
        self.weight = tf.slice(self.output, self.action_holder, [1])
        self.loss = -(tf.math.log(self.weight) * self.reward_holder)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config['AgentLearningRate'])
        self.update = self.optimizer.minimize(self.loss)

        # Processor
        self.exploration = ML.Exploration(self)
        self.exp_buffer = ML.ExperienceBuffer(self)
        self.state_space = ML.StateSpace(self)
        self.action_space = ML.ActionSpace(self)
        self.reward_engine = ML.RewardEngine(self, reward_engine)
        self.recorder = ML.Recorder(self)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5,3), random_state=1) #creating mlp object
majority = VotingClassifier(estimators=[("svm",svm1),("rf",rf),("mlp",mlp)], voting = "hard") #creating majority vote object


#Cross validation iterators
skf = StratifiedKFold(n_splits=5)
loo = LeaveOneOut()

#outlier detectors
angleBased = abod.ABOD(method="fast")
isolationForrest = iforest.IForest(n_estimators=10, behaviour="new")
kNearestNeighbors = knn.KNN(method="median",n_neighbors=5)
detector = kNearestNeighbors

#datasets
cardboard = ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\DDeltas.csv")
wood = ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\EDeltas.csv")
plastic = ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\BDeltas.csv")
plastic = plastic+ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\FDeltas.csv")


#standard expected values
expected = [[0 for x in range(50)],[1 for x in range(50)]]

#removing outliers
cardboard = ml.removeOutliers(cardboard, detector, True)
wood = ml.removeOutliers(wood, detector, True)
plastic = ml.removeOutliers(plastic, detector, True)

#creating expected values after outlier removal
cardboardEV = [[0 for i in range(len(cardboard))],[1 for i in range(len(cardboard))]]
Esempio n. 21
0
        # cv2.imshow("origin", img)
        # cv2.imshow("origin",img)
        # 参数传入函数
        BLCSZ = 2 * cv2.getTrackbarPos('BLOCKSIZE', 'image') + 3
        Csize = cv2.getTrackbarPos('C', 'image')
        #img1 = iPP.BeBinary(target)
        if k == -1:
            #img = cv2.imread('D:/new.png', 0)

            #img = cv2.imread('D:/PProject/pic.png', 0)
            # e1 = cv2.getTickCount()
            ret, bkp = iPP.PreProcess(img, 33, 20, 1)
            cv2.imshow("origin", bkp)
            # wxbmp = wx.BitmapFromBuffer(720, 1280, bkp)
            # e2 = cv2.getTickCount()
            # t = (e2 - e1) / cv2.getTickFrequency()
            # print(t)
        if ret:
            result = ML.ocr()
            resultnow = ''
            resultnow += str(int(result[0][0]))
            resultnow += str(int(result[1][0]))
            resultnow += str(int(result[2][0]))
            if (resultnow == resultlast):
                cv2.putText(bkp, resultnow, org, fontFace, fontScale,
                            fontcolor, thickness, lineType)
                content_text.SetValue(resultnow)
                frame0.Show()

    resultlast = resultnow
print("NaN's in data set after standardization: " +
      str(Data_Stand.isnull().sum().sum()))

Data_NoNAN = fcn.CreateCapData(Data_NoNAN)

if not os.path.isfile(PC.GraphPath + 'ScatterPlot_Norm.png'):
    print('Normalized scatterplot not existing, creating it')
    grph.ScatterMatrix(PC.GraphPath, Data_Stand, 'ScatterPlot_Norm')

Data_NoNAN.describe()

#if (not os.path.isfile(PC.DataPath+'Trainlab.pkl') or
#    not os.path.isfile(PC.DataPath+'Traindat.pkl') or
#    not os.path.isfile(PC.DataPath+'Testdat.pkl') or
#    not os.path.isfile(PC.DataPath+'Testdat.pkl')):
Train_label, Train_data, Test_label, Test_data = ML.Split(Data_NoNAN)
#    Train_label.to_pickle(PC.DataPath+'Trainlab.pkl')
#    Train_data.to_pickle(PC.DataPath+'Traindat.pkl')
#    Test_label.to_pickle(PC.DataPath+'Testlab.pkl')
#    Test_data.to_pickle(PC.DataPath+'Testdat.pkl')
#else:
#    Train_label = pd.read_pickle(PC.DataPath+'Trainlab.pkl')
#    Train_data = pd.read_pickle(PC.DataPath+'Traindat.pkl')
#    Test_label = pd.read_pickle(PC.DataPath+'Testlab.pkl')
#    Test_data = pd.read_pickle(PC.DataPath+'Testdat.pkl')

# Standardize the data based on mean and std dev of train data
# Standardize test data first, so that it doesn't standardize
# on already standardized data

Test_data = fcn.StandardizeData(Test_data, std_Data=Train_data).fillna(0)
def main():
    # Set seed
    np.random.seed(0)

    # Create the data frames from files
    all_patients = pd.read_csv("data/all_pats.csv")
    all_visits = pd.read_csv("data/all_visits.csv")
    all_updrs = pd.read_csv("data/all_updrs.csv")
    all_updrs_subcomponents = pd.read_csv("data/itemizedDistributionOfUPDRSMeaning_Use.csv")

    # Enrolled PD / Control patients
    pd_control_patients = all_patients.loc[
        ((all_patients["DIAGNOSIS"] == "PD") | (all_patients["DIAGNOSIS"] == "Control")) & (
            all_patients["ENROLL_STATUS"] == "Enrolled"), "PATNO"].unique()

    # Data for these patients
    pd_control_data = all_visits[all_visits["PATNO"].isin(pd_control_patients)]

    # Merge with UPDRS scores
    pd_control_data = pd_control_data.merge(all_updrs[["PATNO", "EVENT_ID", "TOTAL"]], on=["PATNO", "EVENT_ID"],
                                            how="left")

    # Get rid of nulls for UPDRS
    pd_control_data = pd_control_data[pd_control_data["TOTAL"].notnull()]

    # Merge with patient info
    pd_control_data = pd_control_data.merge(all_patients, on="PATNO", how="left")

    # TODO: Merge patient's SC features onto baseline if times are close
    # Only include baseline and subsequent visits
    pd_control_data = pd_control_data[
        (pd_control_data["EVENT_ID"] != "ST") & (
            pd_control_data["EVENT_ID"] != "U01") & (pd_control_data["EVENT_ID"] != "PW") & (
            pd_control_data["EVENT_ID"] != "SC")]

    # Encode to numeric
    mL.clean_data(data=pd_control_data, encode_auto=["GENDER.x", "DIAGNOSIS", "HANDED"], encode_man={
        "EVENT_ID": {"BL": 0, "V01": 1, "V02": 2, "V03": 3, "V04": 4, "V05": 5, "V06": 6, "V07": 7, "V08": 8,
                     "V09": 9, "V10": 10, "V11": 11, "V12": 12}})

    # TODO: Optimize flexibility with NAs
    # Eliminate features with more than 20% NAs
    for feature in pd_control_data.keys():
        if len(pd_control_data.loc[pd_control_data[feature].isnull(), feature]) / len(
                pd_control_data[feature]) > 0.2:
            pd_control_data = pd_control_data.drop(feature, 1)

    # TODO: Rethink this
    # Eliminate features with more than 30% NA at Baseline
    for feature in pd_control_data.keys():
        if len(pd_control_data.loc[
                           (pd_control_data["EVENT_ID"] == 0) & (pd_control_data[feature].isnull()), feature]) / len(
            pd_control_data[pd_control_data["EVENT_ID"] == 0]) > 0.3:
            pd_control_data = pd_control_data.drop(feature, 1)

    # TODO: Imputation
    # Drop rows with NAs
    pd_control_data = pd_control_data.dropna()

    # Drop duplicates (keep first, delete others)
    pd_control_data = pd_control_data.drop_duplicates(subset=["PATNO", "EVENT_ID"])

    # Drop patients without BL data
    for patient in pd_control_data["PATNO"].unique():
        if patient not in pd_control_data.loc[pd_control_data["EVENT_ID"] == 0, "PATNO"].unique():
            pd_control_data = pd_control_data[pd_control_data["PATNO"] != patient]

    # Select all features in the data set
    all_data_features = list(pd_control_data.columns.values)

    for updrs_subscomponent in all_updrs_subcomponents["colname"].tolist():
        print(updrs_subscomponent)
        for i in range(0, 4):
            if all_updrs_subcomponents.loc[
                        all_updrs_subcomponents["colname"] == updrs_subscomponent, "use{}".format(i)].min() == 1:
                # Generate features (and update all features list)
                train = generate_features(data=pd_control_data, features=all_data_features, file="data/PPMI_train.csv",
                                          action=True, updrs_subsets=True, time=True, future=False, milestones=True,
                                          slopes=False, score_name=updrs_subscomponent,
                                          milestone_feature=updrs_subscomponent, milestone_value=i)

                # Initialize predictors as all features
                predictors = list(train.columns.values)

                # Initialize which features to drop from predictors
                drop_predictors = ["PATNO", "EVENT_ID", "INFODT", "INFODT.x", "ORIG_ENTRY", "LAST_UPDATE", "PAG_UPDRS3",
                                   "PRIMDIAG",
                                   "COMPLT", "INITMDDT", "INITMDVS", "RECRUITMENT_CAT", "IMAGING_CAT", "ENROLL_DATE",
                                   "ENROLL_CAT",
                                   "ENROLL_STATUS", "BIRTHDT.x", "GENDER.y", "APPRDX", "GENDER", "CNO", "TIME_FUTURE",
                                   "TIME_NOW",
                                   "SCORE_FUTURE", "SCORE_SLOPE", "TIME_OF_MILESTONE", "TIME_UNTIL_MILESTONE",
                                   "BIRTHDT.y",
                                   "TIME_SINCE_DIAGNOSIS", "TIME_SINCE_FIRST_SYMPTOM", "TIME_FROM_BL"]

                # List of UPDRS components
                updrs_components = ["NP1COG", "NP1HALL", "NP1DPRS", "NP1ANXS", "NP1APAT", "NP1DDS", "NP1SLPN",
                                    "NP1SLPD",
                                    "NP1PAIN",
                                    "NP1URIN", "NP1CNST", "NP1LTHD", "NP1FATG", "NP2SPCH", "NP2SALV", "NP2SWAL",
                                    "NP2EAT",
                                    "NP2DRES", "NP2HYGN", "NP2HWRT", "NP2HOBB", "NP2TURN", "NP2TRMR", "NP2RISE",
                                    "NP2WALK",
                                    "NP2FREZ", "PAG_UPDRS3", "NP3SPCH", "NP3FACXP", "NP3RIGN", "NP3RIGRU", "NP3RIGLU",
                                    "PN3RIGRL",
                                    "NP3RIGLL", "NP3FTAPR", "NP3FTAPL", "NP3HMOVR", "NP3HMOVL", "NP3PRSPR", "NP3PRSPL",
                                    "NP3TTAPR",
                                    "NP3TTAPL", "NP3LGAGR", "NP3LGAGL", "NP3RISNG", "NP3GAIT", "NP3FRZGT", "NP3PSTBL",
                                    "NP3POSTR",
                                    "NP3BRADY", "NP3PTRMR", "NP3PTRML", "NP3KTRMR", "NP3KTRML", "NP3RTARU", "NP3RTALU",
                                    "NP3RTARL",
                                    "NP3RTALL", "NP3RTALJ", "NP3RTCON"]

                # Drop UPDRS components
                # drop_predictors.extend(updrs_components)

                # Drop unwanted features from predictors list
                for feature in drop_predictors:
                    if feature in predictors:
                        predictors.remove(feature)

                # Target for the model
                target = "TIME_UNTIL_MILESTONE"

                # Algs for model
                # Grid search (futures): n_estimators=50, min_samples_split=75, min_samples_leaf=50
                # Futures: n_estimators=150, min_samples_split=100, min_samples_leaf=25
                # Grid search (slopes): 'min_samples_split': 75, 'n_estimators': 50, 'min_samples_leaf': 25
                algs = [
                    RandomForestRegressor(n_estimators=150, min_samples_split=100, min_samples_leaf=25, oob_score=True),
                    LogisticRegression(),
                    SVC(probability=True),
                    GaussianNB(),
                    MultinomialNB(),
                    BernoulliNB(),
                    KNeighborsClassifier(n_neighbors=25),
                    GradientBoostingClassifier(n_estimators=10, max_depth=3)]

                # Alg names for model
                alg_names = ["Random Forest",
                             "Logistic Regression",
                             "SVM",
                             "Gaussian Naive Bayes",
                             "Multinomial Naive Bayes",
                             "Bernoulli Naive Bayes",
                             "kNN",
                             "Gradient Boosting"]

                # TODO: Configure ensemble
                # Ensemble
                ens = mL.ensemble(algs=algs, alg_names=alg_names,
                                  ensemble_name="Weighted ensemble of RF, LR, SVM, GNB, KNN, and GB",
                                  in_ensemble=[True, True, True, True, False, False, True, True],
                                  weights=[3, 2, 1, 3, 1, 3],
                                  voting="soft")

                # Add ensemble to algs and alg_names
                # algs.append(ens["alg"])
                # alg_names.append(ens["name"])

                # Parameters for grid search
                grid_search_params = [{"n_estimators": [50, 150, 300, 500, 750, 1000],
                                       "min_samples_split": [4, 8, 25, 50, 75, 100],
                                       "min_samples_leaf": [2, 8, 15, 25, 50, 75, 100]}]

                # Display ensemble metrics
                metrics1 = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                                     cross_val=[True], scoring="r2")

                all_updrs_subcomponents.loc[
                    all_updrs_subcomponents["colname"] == updrs_subscomponent, "over{}_r2".format(i)] = \
                    metrics1["Cross Validation r2"]

                # Display ensemble metrics
                metrics2 = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                                     cross_val=[True], scoring="root_mean_squared_error")

                all_updrs_subcomponents.loc[
                    all_updrs_subcomponents["colname"] == updrs_subscomponent, "over{}_rmse".format(i)] = \
                    metrics2["Cross Validation root_mean_squared_error"]

    all_updrs_subcomponents.to_csv("data/updrs_subcomponents_scores.csv")
Esempio n. 24
0
def model_origin_data(file_twin, file_kink, filetrace_twin, filetrace_kink,
                      filetrace, filename_mark, smooth):
    # 读twin的数据文件
    # 没有模型可以导入的话
    print('svm_machine_learn with no model is running')
    f_twin = Screening.read_file(file_twin)
    # 提取出所有有用信号的频域向量
    result1 = Screening.data_fre(f_twin, filetrace_twin, smooth)
    fre_range = result1[0]
    fre_twin = result1[1]

    # 读kink的数据文件
    f_kink = Screening.read_file(file_kink)
    result2 = Screening.data_fre(
        f_kink, filetrace_kink,
        smooth)  # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]]
    fre_kink = result2[1]

    # 合并数据,并保存label
    label_true = [0 for i in range(len(fre_twin))]
    for i in range(len(fre_kink)):
        label_true.append(1)
    fre = fre_twin
    for i in fre_kink:
        fre.append(i)

    # 测试模型精确度
    model_path = filetrace + '\\' + r'Model\train0_model.m'
    model_svm = joblib.load(model_path)
    label_cluster = model_svm.predict(fre)
    accuracy = accuracy_score(label_true, label_cluster)
    print('accuracy: ', accuracy)
    print('y_test: ', label_true)
    print('y_predicted: ', list(label_cluster))

    # 生成两个cluster文件
    cluster_twin = []
    cluster_kink = []
    for i in range(len(label_cluster)):
        if label_cluster[i] == 0:
            cluster_twin.append(fre[i])
        if label_cluster[i] == 1:
            cluster_kink.append(fre[i])
    filename = 'SVM_cluster-twin'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename + filename_mark + '.csv'
    np.savetxt(f, cluster_twin, fmt='%s', delimiter=',')
    filename = 'SVM_cluster-kink'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename + filename_mark + '.csv'
    np.savetxt(f, cluster_kink, fmt='%s', delimiter=',')
    print('Twin-kink File made')

    # 生成average-frequency
    label_fre = [label_cluster, fre]  # [cluster_label, data_fre]
    result = MachineLearning.ave_fre(
        label_fre, fre_range)  # [[frequency range], [[fre1_twin],[fre2_kink]]]
    temp = [['fre-range', 'twin', 'kink']]
    array = np.array([result[0], result[1][0], result[1][1]])
    array = array.T  # 第一列fre-range,第二列twin,第三列kink
    arr = array.tolist()
    for i in arr:
        temp.append(i)
    # 保存文件
    filename = 'SVM_averange_frequency'
    f = filetrace + '\\' + 'File after Processing' + '\\' + filename + filename_mark + '.csv'
    np.savetxt(f, temp, fmt='%s', delimiter=',')
    print('SVM_averange_frequency-origin_data File made')

    # 绘制2D可视化视图
    result1 = [label_cluster, fre]
    DrawImage.draw_clusterings_svm(result1, filetrace, filename_mark)
Esempio n. 25
0
def PlayGame(opponent):
    global board
    board = Functions.BoardInit()
    player = 1
    UpdateBoard(player)
    if (opponent == "Human"):

        player = randint(1, 2)
        movesAvailable = 1
        gamePlaying = 1
        while gamePlaying != 0:

            if (Functions.GameOver(board) == 1):
                gamePlaying = 0
                score = Functions.BlackWhiteCount(board)
                if (score > 0):
                    winner = 1
                elif (score < 0):
                    winner = 2
                else:
                    winner = 0
                if (winner != 0):
                    print("Game Over! The winner is player ", winner)
                else:
                    print("It's a tie!")

                board = Functions.BoardInit()

            cont.set(0)
            window.wait_variable(cont)
            player = Functions.nextPlayer(board, player)

            time.sleep(0.25)
    if (opponent == "Rand"):
        while Functions.GameOver(board) == 0:

            player = randint(1, 2)
            movesAvailable = 1
            gamePlaying = 1
            while gamePlaying != 0:
                UpdateBoard(player)

                if (Functions.GameOver(board) == 1):
                    gamePlaying = 0
                    score = Functions.BlackWhiteCount(board)
                    if (score > 0):
                        winner = 1
                    elif (score < 0):
                        winner = 2
                    else:
                        winner = 0
                    if (winner != 0):
                        print("Game Over! The winner is player ", winner)
                    else:
                        print("It's a tie!")

                    board = Functions.BoardInit()
                movesAvailable = Functions.MovesAvailable(board, player)

                cont.set(0)
                if (player == 1):
                    window.wait_variable(cont)
                player = Functions.nextPlayer(board, player)

                time.sleep(0.25)
                while (player == 2):
                    movesAvailable = Functions.MovesAvailable(board, player)
                    print("movesAvailable", movesAvailable)
                    if (movesAvailable):
                        numPicked = randint(0, len(movesAvailable) - 1)
                        print("numPicked", numPicked)
                        movePicked = movesAvailable[numPicked]
                        print("movePicked from movesAvailable", movePicked[0],
                              movePicked[1])
                        x = int(movePicked[1])
                        y = int(movePicked[0])
                        board = Functions.MakeMove(board, y, x, player)
                    player = Functions.nextPlayer(board, player)

    elif (opponent == "MinMax"):
        while Functions.GameOver(board) == 0:

            player = randint(1, 2)
            UpdateBoard(player)
            movesAvailable = 1
            gamePlaying = 1
            while gamePlaying != 0:
                UpdateBoard(player)

                if (Functions.GameOver(board) == 1):
                    gamePlaying = 0
                    score = Functions.BlackWhiteCount(board)
                    if (score > 0):
                        winner = 1
                    elif (score < 0):
                        winner = 2
                    else:
                        winner = 0
                    if (winner != 0):
                        print("Game Over! The winner is player ", winner)
                    else:
                        print("It's a tie!")

                    board = Functions.BoardInit()
                movesAvailable = Functions.MovesAvailable(board, player)

                cont.set(0)
                if (player == 1):
                    window.wait_variable(cont)
                player = Functions.nextPlayer(board, player)

                time.sleep(0.25)
                while (player == 2):
                    nextX = 0
                    nextY = 0
                    movesAvailable = Functions.MovesAvailable(
                        board, player)  # all available moves
                    bestPred = 0
                    j = 0
                    while (j <= len(movesAvailable) - 1 and
                           movesAvailable):  # for each ove in movesAvailable
                        moveTest = movesAvailable[j]
                        x = int(moveTest[1])
                        y = int(moveTest[0])
                        testBoard = Functions.BoardCopy(board)
                        testBoard = Functions.MakeMove(testBoard, y, x, player)

                        pred = np.sum(Functions.boardToNN(
                            testBoard,
                            player))  # get the output of an available move

                        #finds the move with the highest output, output of 1 means player 1 is guaranteed to win
                        if (j == 0):
                            bestPred = pred
                            nextX = x
                            nextY = y
                        else:
                            if (pred < bestPred):
                                bestPred = pred
                                nextX = x
                                nextY = y
                        j += 1
                    board = Functions.MakeMove(board, nextY, nextX, player)
                    player = Functions.nextPlayer(board, player)
    elif (opponent == "Network"):
        ops.reset_default_graph()
        numInp = 65  # the number of inputs for the neural network
        numLabel = 1  # the number of inputs for the labels

        # Create Placeholders for input and label
        inp, label = ml.placeholders(numInp, numLabel)

        # Initialise parameters
        parameters = ml.initialiseParameters()

        #make the nerual network
        out = ml.network(inp, parameters)

        modelPath = "./save/NetworkPlayer"
        # Initialise all the variables
        init = tf.global_variables_initializer()

        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(init)
            #loads the network in or initialises a new network
            try:
                saver.restore(sess, modelPath)
                #print("Model restored from file: %s" % modelPath)
            except:
                #sessctd = False
                print("Initialiing")
            while Functions.GameOver(board) == 0:

                board = Functions.BoardInit()
                player = randint(1, 2)
                UpdateBoard(player)
                movesAvailable = 1
                gamePlaying = 1
                while gamePlaying != 0:
                    UpdateBoard(player)

                    if (Functions.GameOver(board) == 1):
                        gamePlaying = 0
                        score = Functions.BlackWhiteCount(board)
                        if (score > 0):
                            winner = 1
                        elif (score < 0):
                            winner = 2
                        else:
                            winner = 0
                        if (winner != 0):
                            print("Game Over! The winner is player ", winner)
                        else:
                            print("It's a tie!")

                        board = Functions.BoardInit()
                    movesAvailable = Functions.MovesAvailable(board, player)

                    cont.set(0)
                    if (player == 1):
                        window.wait_variable(cont)
                    player = Functions.nextPlayer(board, player)

                    time.sleep(0.25)
                    while (player == 2):
                        nextX = 0
                        nextY = 0
                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        bestPred = 0
                        j = 0
                        while (j <= len(movesAvailable) - 1 and movesAvailable
                               ):  # for each ove in movesAvailable
                            moveTest = movesAvailable[j]
                            x = int(moveTest[1])
                            y = int(moveTest[0])
                            testBoard = Functions.BoardCopy(board)
                            testBoard = Functions.MakeMove(
                                testBoard, y, x, player)

                            pred = out.eval(feed_dict={
                                inp:
                                Functions.boardToNN(testBoard, player)
                            })  # get the output of an available move

                            #finds the move with the highest output, output of 1 means player 1 is guaranteed to win
                            if (j == 0):
                                bestPred = pred
                                nextX = x
                                nextY = y
                            else:
                                if (pred < bestPred):
                                    bestPred = pred
                                    nextX = x
                                    nextY = y
                            j += 1
                        board = Functions.MakeMove(board, nextY, nextX, player)
                        player = Functions.nextPlayer(board, player)
Esempio n. 26
0
def training(opponent, batches):
    import Functions
    import MachineLearning as ml
    import tensorflow as tf
    import numpy as np
    import time
    from random import random, randint
    #    from tensorflow import ops

    tf.reset_default_graph()
    lr = 0.0001  # the learning rate
    numInp = 65  # the number of inputs for the neural network
    numLabel = 1  # the number of inputs for the labels
    batchSize = 50  # the size of the batches
    discountRate = 0.99  # the discount rate for temporal difference learning

    # Create Placeholders for input and label
    inp, label = ml.placeholders(numInp, numLabel)

    # Initialise parameters
    parameters = ml.initialiseParameters()

    #make the nerual network
    out = ml.network(inp, parameters)

    #cost function for use in training
    cost = ml.computeCost(out, label)

    #training function
    optimiser = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)

    player1 = "Network"
    player2 = opponent
    # Initialise all the variables
    init = tf.global_variables_initializer()

    progstart = time.time()
    saver = tf.train.Saver()
    expRate = 0.2
    p1wins = 0  # number of games player 1 has won overall
    p2wins = 0  # number of games player 2 has won overall
    modelPath = "./save/NetworkPlayer"  #WLTD1step #testNetScore or testWinLoss
    i = 1
    while i <= (batches):
        s = 1
        winLoss = np.ndarray((1, 1))
        board = Functions.BoardInit()
        boardArray = Functions.boardToNN(board, 1)
        batchp1wins = 0
        batchp2wins = 0
        labelArray = np.ndarray((1, 1))
        labelArray[0][0] = 0
        with tf.Session() as sess:
            sess.run(init)
            #loads the network in or initialises a new network
            try:
                saver.restore(sess, modelPath)
            except:
                print("Initialising")
            start = time.time()
            while s <= batchSize:
                movesAvailable = 1
                gamePlaying = 1
                player = randint(
                    1, 2
                )  # returns either a 1 or a 2 which determines the starting player
                board = Functions.BoardInit()
                nnBoard = Functions.boardToNN(board, player)
                boardArray = np.concatenate((boardArray, nnBoard), axis=1)
                gameLabelArray = np.ndarray((1, 1))
                while gamePlaying != 0:

                    while ((player1 == "Network" and player == 1)
                           and Functions.GameOver(board)
                           == 0):  # if player 1 is a network
                        nextX = 0
                        nextY = 0
                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        bestPred = 0
                        j = 0
                        if (
                                random() > expRate
                        ):  # most of the time the network will play the move with the highest output
                            while (j <= len(movesAvailable) - 1
                                   and movesAvailable
                                   ):  # for each ove in movesAvailable
                                moveTest = movesAvailable[j]
                                x = int(moveTest[1])
                                y = int(moveTest[0])
                                testBoard = Functions.BoardCopy(board)
                                testBoard = Functions.MakeMove(
                                    testBoard, y, x, player)

                                pred = out.eval(feed_dict={
                                    inp:
                                    Functions.boardToNN(testBoard, player)
                                })  # get the output of an available move

                                #finds the move with the highest output, output of 1 means player 1 is guaranteed to win
                                if (j == 0):
                                    bestPred = pred
                                    nextX = x
                                    nextY = y
                                else:
                                    if (pred > bestPred):
                                        bestPred = pred
                                        nextX = x
                                        nextY = y
                                j += 1
                        elif (
                                movesAvailable
                        ):  # sometimes the network will play a move completely randomly to better explore all possible moves
                            move = movesAvailable[randint(
                                0,
                                len(movesAvailable) - 1)]
                            nextX = int(move[1])
                            nextY = int(move[0])
                            testBoard = Functions.BoardCopy(board)
                            testBoard = Functions.MakeMove(
                                testBoard, nextY, nextX, player)
                            bestPred = out.eval(feed_dict={
                                inp:
                                Functions.boardToNN(testBoard, player)
                            })  # get the output of the move

                        winLoss[0][
                            0] = bestPred * discountRate  # label = output * discount rate
                        gameLabelArray = np.concatenate(
                            (gameLabelArray, winLoss),
                            axis=1)  # add the label to the list of labels
                        board = Functions.MakeMove(board, nextY, nextX,
                                                   player)  # update the board
                        nnBoard = Functions.boardToNN(board, player)
                        if (Functions.GameOver(board) == 1
                            ):  # if the game is over
                            result = Functions.BlackWhiteCount(
                                board)  #sum(nnBoard)#find the winner
                            winLoss[0][0] = result
                            #                            if(result>0):
                            #                                winLoss[0][0] = 1
                            #                            elif(result==0):
                            #                                winLoss[0][0] = 0
                            #                            elif(result<0):
                            #                                winLoss[0][0] = -1
                            # final label is equal to the winner
                            gameLabelArray = np.concatenate(
                                (gameLabelArray, winLoss),
                                axis=1)  # add the label to the list of labels
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards
                            #print(winLoss[0][0])
                        else:
                            player = Functions.nextPlayer(
                                board, player)  # find who plays next
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards

                    while (((player2 == "Rand" and player == 2))
                           and Functions.GameOver(board)
                           == 0):  # if player 2 is random

                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        if (movesAvailable):
                            # pick and play a move at random
                            numPicked = randint(0, len(movesAvailable) - 1)
                            movePicked = movesAvailable[numPicked]
                            x = int(movePicked[1])
                            y = int(movePicked[0])
                            board = Functions.MakeMove(board, y, x, player)
                            # add the resulting position to the board array and the label array
                            nnBoard = Functions.boardToNN(board, player)
                            bestPred = out.eval(feed_dict={inp: nnBoard})
                            winLoss[0][0] = bestPred * discountRate
                            gameLabelArray = np.concatenate(
                                (gameLabelArray, winLoss), axis=1)
                            if (Functions.GameOver(board) == 1
                                ):  # if the game is over
                                result = Functions.BlackWhiteCount(
                                    board)  #sum(nnBoard)#find the winner
                                winLoss[0][0] = result
                                #                                if(result>0):
                                #                                    winLoss[0][0] = 1
                                #                                elif(result==0):
                                #                                    winLoss[0][0] = 0
                                #                                elif(result<0):
                                #                                    winLoss[0][0] = -1
                                # final label is equal to the winner
                                gameLabelArray = np.concatenate(
                                    (gameLabelArray, winLoss), axis=1
                                )  # add the label to the list of labels
                                boardArray = np.concatenate(
                                    (boardArray, nnBoard), axis=1
                                )  # add the board to the list of boards

                        if (Functions.GameOver(board) == 0):
                            player = Functions.nextPlayer(
                                board, player)  # find who plays next
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards

                    while ((player2 == "Network" and player == 2)
                           and Functions.GameOver(board) == 0):
                        nextX = 0
                        nextY = 0
                        movesAvailable = Functions.MovesAvailable(
                            board, player)  # all available moves
                        bestPred = 0
                        j = 0
                        if (
                                random() > expRate
                        ):  # most of the time the network will play the move with the highest output
                            while (j <= len(movesAvailable) - 1
                                   and movesAvailable
                                   ):  # for each ove in movesAvailable
                                moveTest = movesAvailable[j]
                                x = int(moveTest[1])
                                y = int(moveTest[0])
                                testBoard = Functions.BoardCopy(board)
                                testBoard = Functions.MakeMove(
                                    testBoard, y, x, player)

                                pred = out.eval(feed_dict={
                                    inp:
                                    Functions.boardToNN(testBoard, player)
                                })  # get the output of an available move

                                #finds the move with the lowest output, output of -1 means player 2 is guaranteed to win
                                if (j == 0):
                                    bestPred = pred
                                    nextX = x
                                    nextY = y
                                else:
                                    if (pred < bestPred):
                                        bestPred = pred
                                        nextX = x
                                        nextY = y
                                j += 1
                        elif (
                                movesAvailable
                        ):  # sometimes the network will play a move completely randomly to better explore all possible moves
                            move = movesAvailable[randint(
                                0,
                                len(movesAvailable) - 1)]
                            nextX = int(move[1])
                            nextY = int(move[0])
                            testBoard = Functions.BoardCopy(board)
                            testBoard = Functions.MakeMove(
                                testBoard, nextY, nextX, player)
                            bestPred = out.eval(feed_dict={
                                inp:
                                Functions.boardToNN(testBoard, player)
                            })  # get the output of the move

                        winLoss[0][
                            0] = bestPred * discountRate  # label = output * discount rate
                        gameLabelArray = np.concatenate(
                            (gameLabelArray, winLoss),
                            axis=1)  # add the label to the list of labels
                        board = Functions.MakeMove(board, nextY, nextX,
                                                   player)  # update the board
                        nnBoard = Functions.boardToNN(board, player)
                        if (Functions.GameOver(board) == 1
                            ):  # if the game is over
                            result = Functions.BlackWhiteCount(
                                board)  #sum(nnBoard)#find the winner
                            winLoss[0][0] = result
                            #                            if(result>0):
                            #                                winLoss[0][0] = 1
                            #                            elif(result==0):
                            #                                winLoss[0][0] = 0
                            #                            elif(result<0):
                            #                                winLoss[0][0] = -1
                            # final label is equal to the winner
                            gameLabelArray = np.concatenate(
                                (gameLabelArray, winLoss),
                                axis=1)  # add the label to the list of labels
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards
                            #print(winLoss[0][0])
                        else:
                            player = Functions.nextPlayer(
                                board, player)  # find who plays next
                            boardArray = np.concatenate(
                                (boardArray, nnBoard),
                                axis=1)  # add the board to the list of boards

                    if (Functions.GameOver(board) == 1):  #if the game ends
                        concArray = np.copy(
                            gameLabelArray[0]
                            [1:])  # copy all the data from the array of labels
                        size = len(concArray)
                        concArray = np.reshape(concArray, (1, size))
                        """
                        reshape the array and remove the first value, 
                        this means the label for the first board state is equal to the output of the next move * the discount rate
                        """
                        labelArray = np.concatenate((labelArray, concArray),
                                                    axis=1)
                        gamePlaying = 0
                        Winner = Functions.BlackWhiteCount(
                            board)  #sum(nnBoard)
                        if (Winner > 0):
                            batchp1wins += 1
                            p1wins += 1
                        elif (Winner < 0):
                            batchp2wins += 1
                            p2wins += 1
                s += 1
            print("batch: ", i, " out of ", batches)

            print("player 1 (", player1, ") wins ", batchp1wins)
            print("player 2 (", player2, ") wins ", batchp2wins)
            """
            the following code flips the training data since a position where player 2 is guaranteed to win 
            would be a position where player 1 is guaranteed to win if every piece was reversed
            """
            labelArrayOpp = Functions.ReverseArray(labelArray)
            boardArrayOpp = Functions.ReverseArray(boardArray)
            labelArray = np.concatenate((labelArray, labelArrayOpp), axis=1)
            boardArray = np.concatenate((boardArray, boardArrayOpp), axis=1)

            #trains the neural network
            _, boardCost = sess.run([optimiser, cost],
                                    feed_dict={
                                        inp: boardArray,
                                        label: labelArray
                                    })
            print(boardCost)
            #savePath = saver.save(sess, modelPath)# saves the updated network
            end = time.time()
            print("batch time(secs) ", end - start)

        i += 1

    progend = time.time()
    print("total games: ", batches * batchSize)
    print("player 1 (", player1, ") wins ", p1wins)
    print("player 2 (", player2, ") wins ", p2wins)
    print("total time(secs) ", progend - progstart)
Esempio n. 27
0
import MachineLearning as ML

if __name__ == '__main__':

    SVM = ML.ML_SVM(True)

    # Parametros para buscar descargas
    diaAnalizarIni = '2016-01-01 00:00:00'
    diaAnalizarFin = '2016-04-01 00:00:00'
    # coordenadaAnalizar = '-57.606765,-25.284659'  # Asuncion2
    coordenadaAnalizar = '-57.58762493212727,-25.362657878768985'  # Asuncion2
    # coordenadaAnalizar = '-54.842809,-25.459519' # Ciudad del Este Aeropuerto Guarani
    # coordenadaAnalizar = '-55.873211,-27.336775' # Encarnacion - Playa San Jose
    tiempoIntervalo = 10  # minutos
    diametroAnalizar = '45000'  # en metros

    SVM.RecorrerYGenerar(diaAnalizarIni, diaAnalizarFin, coordenadaAnalizar,
                         tiempoIntervalo, diametroAnalizar)
Esempio n. 28
0
def main():
    # Set seed
    np.random.seed(0)

    # Create the training/test set(s) from file(s)
    train = pd.read_csv("data/all_visits_practice_2.csv")

    # Preliminary data diagnostics
    mL.describe_data(data=train, describe=True, info=True, value_counts=["ONOFF", "NP3BRADY"],
                     description="PRELIMINARY DATA DIAGNOSTICS:")

    # Encode EVENT_ID to numeric
    mL.clean_data(data=train, encode_man={"EVENT_ID": {"SC": 0, "V04": 4, "V06": 6, "V10": 10}})

    # Choose On or Off
    train = train[train["ONOFF"] == 0]

    # Remove the class with only a single sample
    train = train[train.NP3BRADY != 4]

    # Predictors for the model
    predictors = ["TIME_PASSED", "VISIT_NOW", "CAUDATE_R", "CAUDATE_L", "PUTAMEN_R", "PUTAMEN_L",
                  "SCORE_NOW"]

    # Target for the model
    target = "SCORE_NEXT"

    # Generate new features
    train = generate_features(data=train, predictors=predictors, target=target, id_name="PATNO", score_name="NP3BRADY",
                              visit_name="EVENT_ID")

    # Value counts for EVENT_ID after feature generation
    mL.describe_data(data=train, info=True, describe=True, value_counts=["VISIT_NOW", "SCORE_NEXT"],
                     description="AFTER FEATURE GENERATION:")

    # Univariate feature selection
    mL.describe_data(data=train, univariate_feature_selection=[predictors, target])

    # Algs for model
    algs = [RandomForestClassifier(n_estimators=1000, min_samples_split=50, min_samples_leaf=2, oob_score=True),
            LogisticRegression(),
            SVC(probability=True),
            GaussianNB(),
            MultinomialNB(),
            BernoulliNB(),
            KNeighborsClassifier(n_neighbors=25),
            GradientBoostingClassifier(n_estimators=10, max_depth=3)]

    # Alg names for model
    alg_names = ["Random Forest",
                 "Logistic Regression",
                 "SVM",
                 "Gaussian Naive Bayes",
                 "Multinomial Naive Bayes",
                 "Bernoulli Naive Bayes",
                 "kNN",
                 "Gradient Boosting"]

    # Parameters for grid search
    grid_search_params = [{"n_estimators": [50, 500, 1000],
                           "min_samples_split": [25, 50, 75],
                           "min_samples_leaf": [2, 15, 25, 50]}]

    # Ensemble
    ens = mL.ensemble(algs=algs, alg_names=alg_names,
                      ensemble_name="Weighted ensemble of RF, LR, SVM, GNB, KNN, and GB",
                      in_ensemble=[True, True, True, True, False, False, True, True], weights=[3, 2, 1, 3, 1, 3],
                      voting="soft")

    # Add ensemble to algs and alg_names
    algs.append(ens["alg"])
    alg_names.append(ens["name"])

    # Display ensemble metrics
    mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
               feature_importances=[True], base_score=[True], oob_score=[True],
               cross_val=[True, True, True, True, True, True, True, True, True],
               split_accuracy=[True, True, True, True, True, True, True, True, True],
               split_classification_report=[False, False, False, False, False, False, False, False, True],
               split_confusion_matrix=[False, False, False, False, False, False, False, False, True])
Esempio n. 29
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec  7 15:15:35 2018

@author: aantoniadis
"""

import New_data
import MachineLearning

######## SETTINGS ##############

regression = 'ridge'  # choose the ML model: linear, ridge, ridgecv, multitasklasso, multitaskelasticnet  (Recommended: ridge)
do_rv_cor = 'yes'  # choose 'yes' if you want to do rv correction to the spectra or 'no' if they are already corrected
########################

New_data.EWmeasurements(do_rv_cor)

MachineLearning.ML(regression)
Esempio n. 30
0
import moviepy.editor as mpy
import numpy as np
import pandas as pd
from scipy.spatial import ConvexHull

import MachineLearning as ML
from util import DatabaseConnection as db
from util import PlotData as plt

# Configuraciones por defecto
writeAnalisis = True  # Si queremos crear un .csv con conclusion y resumen del analisis

if __name__ == '__main__':

    SVM = ML.ML_SVM(False)

    inicio_de_tiempo = time.time()
    #  DATOS DE ANALISIS DE PRUEBA
    diaAnalizarIni = datetime.strptime('2016-01-25 14:00:00',
                                       '%Y-%m-%d %H:%M:%S')
    diaAnalizarFin = datetime.strptime('2016-01-25 15:30:00',
                                       '%Y-%m-%d %H:%M:%S')
    # coordenadaAnalizar = '-57.606765,-25.284659'  # Asuncion
    coordenadaAnalizar = '-55.873211,-27.336775'  # Encarnacion - Playa San Jose

    tiempoIntervalo = 10  # minutos
    # DATOS DE ANALISIS EN TIEMPO REAL

    # diaAnalizarIni = datetime.now() - timedelta(minutes=15)
    # diaAnalizarFin = datetime.now()
Esempio n. 31
0
 def machine_learning(self):
     print(m.ml_task())
Esempio n. 32
0
import MachineLearning

testing_params = [
    [6.7, 0.76, 0.02, 1.8, 0.078, 6, 12, 0.996, 3.55, 0.63, 9.95],  # C_WINE
    [6.6, 0.61, 0.01, 1.9, 0.08, 8, 25, 0.99746, 3.69, 0.73, 10.5],  # B_WINE
    [10.7, 0.52, 0.38, 2.6, 0.066, 29, 56, 0.99577, 3.15, 0.79,
     12.1],  # A_WINE
    [9.5, 0.72, 0.24, 2.3, 0.07, 21, 47, 0.9962, 3.54, 0.70, 11.3]  # ?
]

print("\nTestando valores para modelo SVM Linear")
response = MachineLearning.execute_model('wines_svm_linear', testing_params)

print(response)

print("\nTestando valores para modelo SVM RBF")
response = MachineLearning.execute_model('wines_svm_rbf', testing_params)

print(response)

print("\nTestando valores para modelo Random Forest")
response = MachineLearning.execute_model('wines_rf', testing_params)

print(response)
def run(preprocess_data, cohorts, target, score_name, feature_elimination_n, gen_filename, gen_action,
        gen_updrs_subsets, gen_time, gen_future, gen_milestones, gen_milestone_features_values, gen_slopes,
        predictors_filename, predictors_action, feature_importance_n, grid_search_action, grid_search_results,
        print_results, results_filename, prediction_range, range_target, range_target_description, add_predictors,
        drop_predictors):
    # Initialize empty add_predictors
    if add_predictors is None:
        add_predictors = []

    # Data keys
    data_keys = ["PATNO", "EVENT_ID", "INFODT", "PDDXDT", "SXDT", "BIRTHDT.x", "HAS_PD", target]

    # Target keys
    target_keys = [score_name] if gen_future or gen_slopes else [
        x[0] for x in gen_milestone_features_values] if gen_milestones else []

    # Add target keys to data keys
    data_keys.extend(target_keys)

    # TODO: Create data_preprocessing() function for all of this data preprocessing
    if preprocess_data:
        # Create the data frames from files
        with np.warnings.catch_warnings():
            np.warnings.simplefilter("ignore")
            all_patients = pd.read_csv("data/all_pats.csv")
            all_visits = pd.read_csv("data/all_visits.csv")
            all_updrs = pd.read_csv("data/all_updrs.csv")

        # Enrolled cohorts patients
        pd_control_patients = all_patients.loc[
            (np.bitwise_or.reduce(np.array([(all_patients["APPRDX"] == cohort) for cohort in cohorts]))) & (
                all_patients["ENROLL_STATUS"] == "Enrolled"), "PATNO"].unique()

        # Data for these patients
        pd_control_data = all_visits[all_visits["PATNO"].isin(pd_control_patients)].merge(
                all_updrs[["PATNO", "EVENT_ID", "TOTAL"]], on=["PATNO", "EVENT_ID"], how="left").merge(
                all_patients, on="PATNO", how="left", suffixes=["_x", ""])

        # Only include "off" data
        pd_control_data = pd_control_data[pd_control_data["PAG_UPDRS3"] == "NUPDRS3"]

        # # Merge SC data onto BL data
        # sc_bl_merge = pd_control_data[pd_control_data["EVENT_ID"] == "BL"].merge(
        #     pd_control_data[pd_control_data["EVENT_ID"] == "SC"], on="PATNO", how="left", suffixes=["", "_SC_ID"])
        #
        # # Remove SC data that already belongs to BL
        # pd_control_data.loc[pd_control_data["EVENT_ID"] == "BL"] = sc_bl_merge.drop(
        #     [col for col in sc_bl_merge.columns if col[-6:] == "_SC_ID"], axis=1).values

        # # Initiate progress
        # prog = Progress(0, len(pd_control_data["PATNO"].unique()), "Merging Screening Into Baseline", print_results)
        #
        # # Use SC data where BL is null
        # for patient in pd_control_data["PATNO"].unique():
        #     if not pd_control_data[(pd_control_data["PATNO"] == patient) & (pd_control_data["EVENT_ID"] == "SC")].empty:
        #         for column in pd_control_data.keys():
        #             if (pd_control_data.loc[(pd_control_data["PATNO"] == patient) & (
        #                         pd_control_data["EVENT_ID"] == "BL"), column].isnull().values.all()) and (
        #                     pd_control_data.loc[(pd_control_data["PATNO"] == patient) & (
        #                                 pd_control_data["EVENT_ID"] == "SC"), column].notnull().values.any()):
        #                 pd_control_data.loc[
        #                     (pd_control_data["PATNO"] == patient) & (pd_control_data["EVENT_ID"] == "BL"), column] = \
        #                     max(pd_control_data.loc[
        #                             (pd_control_data["PATNO"] == patient) & (
        #                                 pd_control_data["EVENT_ID"] == "SC"), column].tolist())
        #     # Update progress
        #     prog.update_progress()

        # Remove SC rows
        pd_control_data = pd_control_data[pd_control_data["EVENT_ID"] != "SC"]

        # Drop duplicates based on PATNO and EVENT_ID, keep only first
        pd_control_data = pd_control_data.drop_duplicates(subset=["PATNO", "EVENT_ID"], keep="first")

        # Encode to numeric
        mL.clean_data(data=pd_control_data, encode_auto=["HANDED", "PAG_UPDRS3"], encode_man={
            "EVENT_ID": {"BL": 0, "V01": 1, "V02": 2, "V03": 3, "V04": 4, "V05": 5, "V06": 6, "V07": 7, "V08": 8,
                         "V09": 9, "V10": 10, "V11": 11, "V12": 12}})

        # Create HAS_PD column
        pd_control_data["HAS_PD"] = 0
        pd_control_data.loc[(pd_control_data["APPRDX"] == "PD") | (pd_control_data["APPRDX"] == "GRPD") | (
            pd_control_data["APPRDX"] == "GCPD"), "HAS_PD"] = 1

        # Convert remaining categorical data to binary columns
        numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
        dummy_features = [item for item in pd_control_data.columns.values if item not in list(
                pd_control_data.select_dtypes(include=numerics).columns.values) + drop_predictors]
        pd_control_data = pd.get_dummies(pd_control_data, columns=dummy_features)

        # Controls have missing PDDXDT and SXDT, set to arbitrary date
        pd_control_data.loc[pd_control_data["HAS_PD"] == 0, "PDDXDT"] = pd.to_datetime("1/1/1800")
        pd_control_data.loc[pd_control_data["HAS_PD"] == 0, "SXDT"] = pd.to_datetime("1/1/1800")

        pd_control_data.to_csv("data/PPMI_Clean_Data.csv", index=False)
    else:
        # Use preprocessed data
        pd_control_data = pd.read_csv("data/PPMI_Clean_Data.csv")

        # Convert to correct dtypes
        pd_control_data[["PATNO", "EVENT_ID"]] = pd_control_data[["PATNO", "EVENT_ID"]].apply(pd.to_numeric,
                                                                                              errors="coerce")

    if predictors_action:
        if print_results:
            print("Optimizing Predictors . . .")

        # Drop unused columns
        for column in pd_control_data.keys():
            if (column in drop_predictors) and (column not in data_keys):
                pd_control_data = pd_control_data.drop(column, 1)
    else:
        # Drop unused columns
        pd_control_data = pd_control_data[list(
                set(add_predictors + data_keys) & set(
                        pd_control_data.columns.values.tolist()))]

        if print_results:
            # Print number patients and features before feature elimination
            print("BEFORE FEATURE ELIMINATION: Patients: {}, Features: {}".format(
                    len(pd_control_data[pd_control_data["EVENT_ID"] == 0]),
                    len(pd_control_data.keys())))

    pd_control_data.to_csv("TEST.csv")

    # Perform optimal feature elimination
    if feature_elimination_n is None:
        feature_elimination_n = max([x / 1000 for x in range(25, 1000, 25)],
                                    key=lambda n: feature_row_selection(pd_control_data, n, data_keys, target_keys,
                                                                        True, True))
        if print_results:
            print("\rFeature Elimination N: {}\n".format(feature_elimination_n))

    # Feature/row elimination
    pd_control_data = feature_row_selection(pd_control_data, feature_elimination_n, data_keys, target_keys)

    if (not predictors_action) and print_results:
        # Print number patients and features after feature elimination
        print("AFTER FEATURE ELIMINATION: Patients: {}, Features: {}".format(
                len(pd_control_data[pd_control_data["EVENT_ID"] == 0]),
                len(pd_control_data.keys())))

    # Select all features in the data set
    all_data_features = list(pd_control_data.columns.values)

    pd_control_data.to_csv("testttttt.csv")

    # Generate features (and update all features list)
    train = generate_features(data=pd_control_data, features=all_data_features, filename=gen_filename,
                              action=gen_action, updrs_subsets=gen_updrs_subsets,
                              time=gen_time, future=gen_future, milestones=gen_milestones, slopes=gen_slopes,
                              score_name=score_name, milestone_features_values=gen_milestone_features_values,
                              progress=(not predictors_action) and print_results)

    if (not predictors_action) and print_results:
        # Data diagnostics after feature generation
        mL.describe_data(data=train, describe=True, description="AFTER FEATURE GENERATION:")

    # Parameters for grid search
    grid_search_params = [{"n_estimators": [50, 150, 300, 500, 750, 1000],
                           "min_samples_split": [4, 8, 25, 50, 75, 100],
                           "min_samples_leaf": [2, 8, 15, 25, 50, 75, 100]}]

    # Algs for model
    # Grid search (futures): n_estimators=50, min_samples_split=75, min_samples_leaf=50
    # Futures: n_estimators=150, min_samples_split=100, min_samples_leaf=25
    # Grid search (slopes): 'min_samples_split': 75, 'n_estimators': 50, 'min_samples_leaf': 25
    # Futures: 'min_samples_leaf': 100, 'min_samples_split': 25, 'n_estimators': 50
    # Newest Futures: {'n_estimators': 500, 'min_samples_leaf': 2, 'min_samples_split': 4}
    # TRMR: {'n_estimators': 150, 'min_samples_leaf': 2, 'min_samples_split': 8}
    # Slopes: {'n_estimators': 500, 'min_samples_split': 25, 'min_samples_leaf': 2}
    algs = [
        RandomForestRegressor(n_estimators=500, min_samples_split=4, min_samples_leaf=2,
                              oob_score=True) if target != "SCORE_SLOPE" else RandomForestClassifier(n_estimators=500,
                                                                                                     min_samples_split=25,
                                                                                                     min_samples_leaf=2,
                                                                                                     oob_score=True),
        LogisticRegression(),
        SVC(probability=True),
        GaussianNB(),
        MultinomialNB(),
        BernoulliNB(),
        KNeighborsClassifier(n_neighbors=25),
        GradientBoostingClassifier(n_estimators=10, max_depth=3)]

    # Alg names for model
    alg_names = ["Random Forest",
                 "Logistic Regression",
                 "SVM",
                 "Gaussian Naive Bayes",
                 "Multinomial Naive Bayes",
                 "Bernoulli Naive Bayes",
                 "kNN",
                 "Gradient Boosting"]

    # TODO: Configure ensemble
    # Ensemble
    ens = mL.ensemble(algs=algs, alg_names=alg_names,
                      ensemble_name="Weighted ensemble of RF, LR, SVM, GNB, KNN, and GB",
                      in_ensemble=[True, True, True, True, False, False, True, True],
                      weights=[3, 2, 1, 3, 1, 3],
                      voting="soft")

    # Add ensemble to algs and alg_names
    # algs.append(ens["alg"])
    # alg_names.append(ens["name"])

    if predictors_action:
        # Initialize predictors as all numeric features
        numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
        predictors = list(train.select_dtypes(include=numerics).columns.values)

        # Drop unwanted features from predictors list
        for feature in drop_predictors:
            if feature in predictors:
                predictors.remove(feature)

        # If grid search action, use grid search estimator
        if grid_search_action:
            algs[0] = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                                 scoring="r2" if target != "SCORE_SLOPE" else "accuracy",
                                 grid_search_params=grid_search_params,
                                 output=True)["Grid Search Random Forest"].best_estimator_

        train[predictors + ["PATNO"]].to_csv("test_yay_delete.csv")

        # Get feature importances
        feature_importances = mL.metrics(data=train, predictors=predictors, target=target, algs=algs,
                                         alg_names=alg_names, feature_importances=[True], output=True,
                                         description=None)["Feature Importances Random Forest"]

        # Set important features as predictors
        predictors = [x for x, y in feature_importances if y >= feature_importance_n]

        # Use predictors plus added predictors
        add_predictors.extend(predictors)

        # Output predictors to file
        pd.DataFrame({"predictors": predictors}).to_csv(predictors_filename, index=False)

        # Run with new predictors
        run(False, cohorts, target, score_name, feature_elimination_n, gen_filename, gen_action,
            gen_updrs_subsets, gen_time, gen_future, gen_milestones, gen_milestone_features_values, gen_slopes,
            predictors_filename, False, feature_importance_n, grid_search_action, grid_search_results, print_results,
            results_filename, prediction_range, range_target, range_target_description, add_predictors, drop_predictors)
    else:
        # Get predictors from file
        predictors = add_predictors

        # Create file of training data
        train[predictors].to_csv("data/PPMI_train.csv")

        # Grid search
        if grid_search_action or grid_search_results:
            # Compute grid search
            grid_search = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                                     scoring="r2" if target != "SCORE_SLOPE" else "accuracy",
                                     grid_search_params=grid_search_params, output=True)

            # If grid search action, use grid search estimator
            if grid_search_action:
                algs[0] = grid_search["Grid Search Random Forest"].best_estimator_

        # Univariate feature selection
        # mL.describe_data(data=train, univariate_feature_selection=[predictors, target])

        # Display metrics, including r2 score
        metrics = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                             feature_importances=[True], base_score=[True], oob_score=[True], cross_val=[True],
                             scoring="r2", output=not print_results)
        # feature_dictionary=[data_dictionary, "FEATURE", "DSCR"])

        # Display mean absolute error score
        metrics.update(mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                                  cross_val=[True], scoring="mean_absolute_error", description=None,
                                  output=not print_results))

        # Display root mean squared error score
        metrics.update(mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                                  cross_val=[True],
                                  scoring="root_mean_squared_error", description=None,
                                  output=not print_results))

        metrics["Cross Validation accuracy Random Forest"] = None

        # Metrics for classification
        if target == "SCORE_SLOPE":
            # Display classification accuracy
            metrics.update(mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                                      cross_val=[True], scoring="accuracy", description=None, output=not print_results))

            # Display confusion matrix
            mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names,
                       split_confusion_matrix=[True], description=None, output=not print_results)

        # If grid search results, print results
        if grid_search_results:
            print(grid_search["Grid Search String Random Forest"])

        if not print_results:
            # Write results to file
            results = pd.DataFrame(
                    columns=[prediction_range, "description", "base", "oob", "r2", "mes", "rmse", "accuracy",
                             "features",
                             "importances"])
            results.loc[0, prediction_range] = range_target
            results.loc[0, "description"] = range_target_description
            results.loc[0, "base"] = metrics["Base Score Random Forest"]
            results.loc[0, "oob"] = metrics["OOB Score Random Forest"]
            results.loc[0, "r2"] = metrics["Cross Validation r2 Random Forest"]
            results.loc[0, "mes"] = metrics["Cross Validation mean_absolute_error Random Forest"]
            results.loc[0, "rmse"] = metrics["Cross Validation root_mean_squared_error Random Forest"]
            results.loc[0, "accuracy"] = metrics["Cross Validation accuracy Random Forest"]
            feature_importances = list(metrics["Feature Importances Random Forest"])
            results.loc[0, "features"] = feature_importances[0][0]
            results.loc[0, "importances"] = feature_importances[0][1]
            for feature, importance in feature_importances[1:]:
                index = results.index.max() + 1
                results.loc[index, "features"] = feature
                results.loc[index, "importances"] = importance
            results.to_csv(results_filename, mode="a", header=False, index=False)
import pandas
import config
from sklearn.utils import resample
import MachineLearning


def binary_to_char(value):
    return 'N' if value == 0 else 'Y'


ml = MachineLearning.MachineLearning()

data = pandas.read_csv(config.DATA_PATH + 'ortopedia.csv', sep=';')

# Convertendo coluna de classificação para caractere
data.Fusao_de_Vertebras = data.Fusao_de_Vertebras.map(binary_to_char)

# Verificando balanceamento de classes
print(data.Fusao_de_Vertebras.value_counts())

# Separando dados das classes
minor = data[data['Fusao_de_Vertebras'] == 'Y']
major = data[data['Fusao_de_Vertebras'] == 'N']

# Rebalanceando a classe com menor número de registros
minor_up_sample = resample(minor,
                           replace=True,
                           n_samples=7900,
                           random_state=None)

# Criando novo dataframe com os dados balanceados
Esempio n. 35
0
import random
import numpy.matlib
import numpy as np
import MachineLearning as ml

test = ml.NeuralNetwork(2, 9, 1)

#Training the model to learn XOR
for i in range(0, 1000):
    test.train(np.array([[1], [0]]), np.array([[1]]))
    test.train(np.array([[0], [1]]), np.array([[1]]))
    test.train(np.array([[1], [1]]), np.array([[0]]))
    test.train(np.array([[0], [0]]), np.array([[0]]))
    print("IH weights: " + str(test.weightIH))
    print("HO weights: " + str(test.weightHO))
    print("Bias H: " + str(test.biasH))
    print("Bias O: " + str(test.biasO))
    print("\n\n")

#Testing the model with XOR
print(test.feedforward(np.array([[1], [0]])))
print(test.feedforward(np.array([[0], [1]])))
print(test.feedforward(np.array([[1], [1]])))
print(test.feedforward(np.array([[0], [0]])))