コード例 #1
0
def train(Theta, X_train, Y_train, learning_rate, noOfIter):
    train_size = X_train.shape[0]

    z = np.dot(X_train, Theta)
    h = Sigmoid.sigmoid(z)

    J = CostFunction.cost(h, Y_train, train_size)
    Jarr = []
    Jarr = np.hstack((Jarr, J))

    for i in range(noOfIter):
        dTheta = 1 / train_size * np.dot((h - Y_train).T, X_train).T
        #print(dTheta.shape)
        Theta = Theta - learning_rate * dTheta
        z = np.dot(X_train, Theta)
        h = Sigmoid.sigmoid(z)
        J = CostFunction.cost(h, Y_train, train_size)
        Jarr = np.hstack((Jarr, J))
        print(J)

    plotGraph = False
    if plotGraph:
        PlotData.plotGraph(Jarr, noOfIter)

    return Theta, J
コード例 #2
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectorFileName = fn.create_prof_vect_name(sys.argv)
    corrFileName = fn.create_correlations_name(sys.argv)

    if not os.path.exists(corrFileName):
        tokenVects = read.word_vects(vectorFileName)
        if tokenVects is None:
            print("Specified vector file not found.")
            print("To create vectors use 'createProfVectors.py'")
            exit()
        ratingVect = read.overall_rating_vect(vectorFileName)
        vocabVect = read.vocab_from_vect_file(vectorFileName)
        corrTups = stat.find_correlations(tokenVects, ratingVect, vocabVect)
        write.token_correlations(corrTups, corrFileName)
    else:
        corrTups = read.token_correlations(corrFileName)

    corrPlotFileName = None
    if '-save' in sys.argv:
        corrPlotFileName = fn.create_correlations_plot_name(sys.argv)

    # Plot correlations
    plot.tuple_pair_score_correlation(
        corrTups,
        title=plot.create_token_pair_score_correlation_name(sys.argv),
        saveFile=corrPlotFileName)
コード例 #3
0
ファイル: GUI.py プロジェクト: samkohn/Geiger-Counter
    def getCountRateHist(self):
        '''Plots a histogram of the count rate.  The number of bins is 
        for the histogram, and the sample length is how long the count rate
        is averaged over (equivalent to "sample length" for the count rate
        vs. time graph.'''

        #check if data has been imported. if not, give a warning
        if self.dataSet:

            #ask for number of bins for histogram
            labelText = "Enter the number of bins"
            numBins = tksd.askinteger("Count Rate Histogram", labelText, parent=self.root, minvalue=1)
            if not numBins:
                return

            #ask for length of sample to calculate count rate
            labelText = "Enter the sample length (seconds)"
            sampleLength = tksd.askfloat("Sample Length", labelText, parent=self.root, minvalue=0)
            if not sampleLength:
                return

            #plot histogram in matplotlib
            pd.plotHistOfCountRates(self.dataSet, sampleLength, numBins)

        else:
            self.showImportAlert()
コード例 #4
0
def CompareDataPerformance(df, names, ptitle="", ModelIndex=0, Epochs=10):
    print("Run for: ", ptitle)
    Train, Test = pf.SplitData(df)
    XTrain, YTrain = pf.TrainandTarget(Train, names)
    XTest, YTest = pf.TrainandTarget(Test, names)
    p_model, p_history = tam.BuildTrainSeqModel(ModelIndex, Epochs, XTrain,
                                                YTrain, XTest, YTest)
    plots = [
        pld.PlotPerformance(p_history.history, ptitle + str(ModelIndex)),
        pld.ROCCurve(XTest, YTest, p_model, ptitle + str(ModelIndex))
    ]
    pld.SavePlots(plots, ptitle + "MI" + str(ModelIndex) + ".pdf")
コード例 #5
0
def capital_gains_lift(source):
    """
    Computes capital gains lift in top income percentages over time chart
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        ("Top 10% income share-including capital gains",
         "Top 10% income share"),
        ("Top 5% income share-including capital gains", "Top 5% income share"),
        ("Top 1% income share-including capital gains", "Top 1% income share"),
        ("Top 0.5% income share-including capital gains",
         "Top 0.5% income share"),
        ("Top 0.1% income share-including capital gains",
         "Top 0.1% income share"),
        ("Top 0.05% income share-including capital gains",
         "Top 0.05% income share"),
    )
    source = list(dataset)
    series = [
        md.delta(gds.timeseries(source, a), gds.timeseries(source, b))
        for a, b in columns
    ]
    return pld.linechart(series,
                         labels=list(col[1] for col in columns),
                         title="U.S. Capital Gains Income Lift",
                         ylabel="Percentage Difference")
コード例 #6
0
ファイル: GUI.py プロジェクト: samkohn/Geiger-Counter
    def showThresholdGraph(self):
        '''Shows a graph of the .wav amplitudes so that the user can figure out a good threshold'''
        #pass
        #import the first 5 seconds of the .wav file

        #prompt for file name
        filename = tkfd.askopenfilename(title="Pick your .wav file", initialdir=".", parent=self.root)
        if not filename:
            return

        rate, wavData = ad.DataSet.readWaveFile(filename)
        #get the first 5 seconds of data
        numEntriesFor5Sec = rate * 5;
        first5Sec = wavData[1:numEntriesFor5Sec];
        
        #plot data
        pd.plot(first5Sec, "Calibrate the Threshold!")
コード例 #7
0
ファイル: GUI.py プロジェクト: samkohn/Geiger-Counter
    def getIntervals(self):
        '''Plots a histogram of the time interval length between consecutive
        counts.'''

        #check if data has been imported. if not, give a warning
        if self.dataSet:

            #ask for the number of bins for the histogram
            labelText = "Enter the desired number of bins in the histogram"
            numBins = tksd.askinteger("Intervals Histogram", labelText, parent=self.root, minvalue=1)
            if not numBins:
                return

            #plot the histogram in matplotlib
            pd.plotHistOfIntervals(self.dataSet, numBins)

        else:
            self.showImportAlert()
コード例 #8
0
ファイル: GUI.py プロジェクト: samkohn/Geiger-Counter
    def getCountRate(self):
        '''Plots the count rate as a function of time. The rates are calculated
        for each "bin" (i.e. independent sample)'''

        #Check if data has been imported. if not, give a warning
        if self.dataSet:

            #prompt for desired bin spacing
            labelText = "Enter the desired sample length in seconds"
            binSpacing = tksd.askfloat("Count Rate", labelText, parent=self.root, minvalue=0, initialvalue=1)
            if not binSpacing:
                return

            #plot the count rate in matplotlib
            pd.plotCountRate(self.dataSet, binSpacing)
            
        else:
            #say that you need to import data first
            self.showImportAlert()
コード例 #9
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    stmr = None
    stopwords = None
    if '-ss' in sys.argv:
        stmr = LancasterStemmer()
        stopwords = read.stopwords(stmr)

    countNames = fn.create_token_count_names(sys.argv)
    rawTokenCountName = countNames[0]
    revTokenCountName = countNames[1]
    profTokenCountName = countNames[2]

    rawTokens = read.token_count(rawTokenCountName, True)
    revTokens = read.token_count(revTokenCountName, True)
    profTokens = read.token_count(profTokenCountName, True)

    if rawTokens == None or revTokens == None or profTokens == None:
        profTokenDict = grab_prof_token_dict(stopwords, stmr)

        if rawTokens == None:
            rawTokens = grab_token_count(profTokenDict, count.num_tokens,
                                         rawTokenCountName)
        if revTokens == None:
            revTokens = grab_token_count(profTokenDict,
                                         count.num_reviews_with_token,
                                         revTokenCountName)

        if profTokens == None:
            profTokens = grab_token_count(profTokenDict,
                                          count.num_profs_with_token,
                                          profTokenCountName)

    plotName = create_plot_name()
    plotFileName = None
    if '-save' in sys.argv:
        plotFileName = fn.create_count_plot_name(sys.argv)

    plot.token_counts(rawTokens, revTokens, profTokens, plotFileName, plotName)
コード例 #10
0
 def _Flip_X_Axis_fired(self):
     global files_selected
     for ifile in files_selected.file_list:
         this_plot = jpl.Plotting(plot_info={'save_file': ifile})
         if not os.path.isfile(this_plot.PickleFile):
             print('FNF:', this_plot.PickleFile)
             pass
         this_plot.LoadPickle(DefWipe=False)
         this_plot.Flip_X_Axis()
         this_plot.PlotAll()
         this_plot.close_fig()
コード例 #11
0
def percent_income_share(source):
    """Create Income Share chart"""
    columns = (
        "Top 10% income share",
        "Top 5% income share",
        "Top 1% income share",
        "Top 0.5% income share",
        "Top 0.1% income share",
    )
    source = list(gds.getDataSetUsingCSV(source))
    return pld.linechart([gds.timeseries(source, col) for col in columns],
                         labels=columns,
                         title="U.S. Percentage Income Share",
                         ylabel="Percentage")
コード例 #12
0
def mean_normalized_percent_income_share(source):
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% income share",
        "Top 5% income share",
        "Top 1% income share",
        "Top 0.5% income share",
        "Top 0.1% income share",
    )
    source = list(dataset)
    return pld.linechart(
        [md.normalize(gds.timeseries(source, col)) for col in columns],
        labels=columns,
        title="Mean Normalized U.S. Percentage Income Share",
        ylabel="Percentage")
コード例 #13
0
def UpdateFileList(plot_info, rc_params, file_list, window_size=None):
    for ifile in file_list:
        this_plot = jpl.Plotting(plot_info={'save_file': ifile})
        if not os.path.isfile(this_plot.PickleFile):
            print('FNF:', this_plot.PickleFile)
            pass
        else:
            print('Updating:', this_plot.PickleFile)
        if window_size is not None:
            this_plot.LoadPickle(DefWipe=False, ForceWindowSize=window_size)
        else:
            this_plot.LoadPickle(DefWipe=False)

        this_plot.UpdateInfo(plot_info)
        pl.rcParams.update(rc_params)
        this_plot.PlotAll()
        this_plot.close_fig()
コード例 #14
0
def income_composition(source):
    """
    Compares income composition
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% income composition-Wages, salaries and pensions",
        "Top 10% income composition-Dividends",
        "Top 10% income composition-Interest Income",
        "Top 10% income composition-Rents",
        "Top 10% income composition-Entrepreneurial income",
    )
    source = list(dataset)
    labels = ("Salary", "Dividends", "Interest", "Rent", "Business")
    return pld.stackedarea([gds.timeseries(source, col) for col in columns],
                           labels=labels,
                           title="U.S. Top 10% Income Composition",
                           ylabel="Percentage")
コード例 #15
0
def average_incomes(source):
    """
    Compares percentage average incomes
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% average income",
        "Top 5% average income",
        "Top 1% average income",
        "Top 0.5% average income",
        "Top 0.1% average income",
        "Top 0.05% average income",
    )
    source = list(dataset)
    return pld.linechart([gds.timeseries(source, col) for col in columns],
                         labels=columns,
                         title="U.S. Average Income",
                         ylabel="2008 US Dollars")
コード例 #16
0
def average_top_income_lift(source):
    """
    Compares top percentage avg income over total avg
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        ("Top 10% average income", "Top 0.1% average income"),
        ("Top 5% average income", "Top 0.1% average income"),
        ("Top 1% average income", "Top 0.1% average income"),
        ("Top 0.5% average income", "Top 0.1% average income"),
        ("Top 0.1% average income", "Top 0.1% average income"),
    )
    source = list(dataset)
    series = [
        md.delta(gds.timeseries(source, a), gds.timeseries(source, b))
        for a, b in columns
    ]
    return pld.linechart(series,
                         labels=list(col[0] for col in columns),
                         title="U.S. Income Disparity",
                         ylabel="2008 US Dollars")
コード例 #17
0
ファイル: main_six.py プロジェクト: karagg/yangmintu
"""局部加权线性回归"""
import numpy as np
import PlotData as PD
import regression_lwlr as lw
from plot_lwlr import plot_lwlr
from hold_out import hold_out3
rng=np.random.RandomState(0)
X=10*rng.rand(120)
def model(x):
    y=2*x-5+rng.randn(120)+1.8*np.sin(3*x)
    return y
y=model(X)#随机产生120个数据
print(PD.plot1(X,y))#初步数据可视化
X1=X.copy()
y1=y.copy()
x1_test=X1[80:]#此处数据是作为数据可视化用的,数据可视化要用一维数组
y1_test=y1[80:]
x1_train=X1[:80]
y1_train=y1[:80]
#print(ya)
X=X.reshape(-1,1)
m=len(y)#获取原特征矩阵的行数
ones=np.ones(m).reshape(-1,1)
X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1
print(X)#输出特征数组
y=y.reshape(-1,1)
"""对数据进行分割,暂时分三分之二为训练集,三分之一为测试集,设定多个k值,通过数据可视化查看拟合情况然后选取最后k值
   进行留出集验证评估模型各性能指标"""
X_train=X[:80,:]#训练集和测试集分割
y_train=y[:80,:]
X_test=X[80:,:]
コード例 #18
0
ファイル: Control.py プロジェクト: cloudy/higgs-ml-challenge
def graphdata(dataframe):
	datasignal, wsignal = pf.ProcessFrame(dataframe, dataframe.Label, 1)
	databack, wback = pf.ProcessFrame(dataframe, dataframe.Label, 0)
	figs = [pld.PlotData([datasignal, databack], feat, [wsignal, wback]) for feat in pf.GetVariables(datasignal)]
	pld.SavePlots(figs)
	pld.ClosePlots(figs)
コード例 #19
0
import EpochVideo
import PlotData

# Code initialization 
if __name__ == '__main__':
    # Parameters
    main_path = r'C:\Users\HeLab\Documents\Ruijia\Project\EyeTracking\Data\v1\624-1339450-OKR'
    
    info_dir = 'OUT_INFO'
    info_path = os.path.join(main_path, info_dir)
    
    video_dir = 'IN_VIDEO'
    video_name = 'video_2019-11-25-15-55-02.h264'
    video_path = os.path.join(main_path, video_dir, video_name)
    
    data_dir = 'OUT_VIDEO'
    data_file = 'RUN1_2020-01-19-15-16'
    data_path = os.path.join(main_path, data_dir , data_file)
     
    result_dir = 'RESULT'
    result_path = os.path.join(main_path, result_dir, data_file)
    
    # Script execution
    print('\n-------Processing data-------')
    ProcessData.main(main_path, info_path, data_path, result_path)
    print('\n-------Epoching data-------')
    EpochData.main(main_path, info_path, data_path, result_path)
    print('\n-------Plot data-------')
    PlotData.main(main_path, info_path, data_path, result_path)
    print('\n-------Epoching video-------')
    EpochVideo.main(main_path, info_path, video_path, data_path, result_path) 
コード例 #20
0
ファイル: main_two.py プロジェクト: karagg/yangmintu

def make_data(N, err=1.0, rseed=1):
    """随机抽样数据"""
    rng = np.random.RandomState(rseed)
    X = rng.rand(N, 1)**2
    y = 10 - 1. / (X.ravel() + 0.1)
    if err > 0:
        y += err * rng.randn(N)
    return X, y


X, y = make_data(40)  #产生四十个样本
X1 = X.copy()
#print(X)
print(PD.plot1(X, y))  #对数据进行初步可视化,方便选择x次方的,还是x开方的函数
y = y.reshape(-1, 1)
"""对degree进行选择,一连串的开方或次方函数用交叉验证得到不同的训练集和验证集的代价函数,通过比较得到较优的degree,在进行模型性能评估"""
degree = np.arange(1, 7)
vc.linear(X, y, degree, 5)  #交叉验证,最后一个参数为所几折交叉验证
X = vc.Degree2(4, X)  #得到个代价函数后,根据初步可视化选好的模型,这里选择开方函数,若有合适的,可选择次方函数Degree
theta, J = gd.grad(X, y, 0.003, 1500)  #生成theta和代价函数J
#可视化预测曲线
x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)
x_test2 = vc.Degree2(4, x_test1)
y_pre1 = np.dot(x_test2, theta)  #通过theta,x,得到y矩阵
print("可视化拟合效果")
print(PD.plot2(X1, y, x_test1, y_pre1))  #可视化拟合效果
y_pre = np.dot(X, theta)
#用较好的模型进行性能评估
MAE = MAE(y, y_pre)  #调用MAE函数
コード例 #21
0
\hoffset -1.5cm
\headsep 1.5cm
\parindent 1.2em
\baselineskip 16pt plus 2pt minus 2pt
\begin{document}
\tiny
'''

this_info = pa.Series()
mat_graph_folder = data_dir + 'MatHack/'
mkdir_p(mat_graph_folder)
this_info['save_file'] = mat_graph_folder + 'FitrComp.pdf'
this_info['title'] = r'fitr comp'
this_info['xlabel'] = r'$\sqrt{8t}/\sqrt{8t_{0}}$'
this_info['ylabel'] = r'ratio'
data_plot = jpl.Plotting(plot_info=this_info)
table_out = {}
for iens, ifile in zip(master_ens_list, this_filelist):
    for iblock in block_flags:
        this_file = ifile.replace('.py3p', iblock + '.py3p')
        if os.path.isfile(this_file):
            print('Reading: ', this_file)
            with open(this_file, 'rb') as f:
                fit_data, dump = pik.load(f)
            data_plot = fit_data.PlotVaryFitr(data_plot)
            fit_data.SortChi()
            table_out[fit_data.name] = fit_data.Get_Formatted_Table(
                fmt_latex=True).to_latex(escape=False).replace(
                    '{}', fit_data.name.replace('_', r'\_'))
        else:
            print('FNF: ', this_file)
コード例 #22
0
# Script utlizzato per laciare i modelli
# Autore: Cristian Di Pietrantonio


# import necessari
from pymodelica import compile_fmu
from pyfmi import load_fmu

import sys
sys.path.append("../../../Utilities")
import os
import PlotData as plt
import numpy as N


model_name="ClosedSystem"

#sono caricati tutti i file ".mo" nella cartella
model_files = [f for f in os.listdir(".") if ".mo" in f]

#input_object = (['noise', 'failures'], input_function)

model_comp = compile_fmu(model_name, model_files)
model = load_fmu(model_comp)
maxt = 1000
res = model.simulate(start_time=0, final_time=maxt, options={'ncp':1000})

plt.plotData(2, 2, ['s.x', 's.pOpen', 'm.y', 's.riverLoad'], res)
コード例 #23
0
def TestAuto():
    '''
    testing function for standard autocorrelation analysis
    '''
    def thisFun(*x):
        return x[0]
    def thisDer(*x):
        return [1]

    const = 100
    this_size = 20000
    values = np.random.uniform(size=this_size)

    values2 = np.arange(this_size)/this_size

    values3 = np.random.normal(loc=0.5,scale=0.25,size=this_size)
    val_df = pa.DataFrame()

    # tuple_list = []
    # for ii in range(100):
    #     tuple_list.append(('-1-',ii))
    # for ii in range(400):
    #     tuple_list.append(('-2-',ii))
    # for ii in range(1000):
    #     tuple_list.append(('-3-',ii))
    # for ii in range(500):
    #     tuple_list.append(('-4-',ii))

    tuple_list = []
    for ii in range(this_size//2):
        tuple_list.append(('-1-',ii))
    for ii in range(this_size//2):
        tuple_list.append(('-2-',ii))
    #
    # tuple_list = []
    # for ii in range(this_size):
    #     tuple_list.append(('-1-',ii))

    indicies = pa.MultiIndex.from_tuples(tuple_list,names=['stream','configs'])
    # indicies = range(this_size)
    # val_df = pa.DataFrame()
    # val_df['one'] = pa.Series(values,index=indicies)
    # val_df['two'] = pa.Series(values2,index=indicies)
    # val_df['three'] = pa.Series(values3,index=indicies)
    # def RatFun(one,two,three):
    #     return const*one/(two*three)
    #
    # def RatFunDer(one,two,three):
    #     return [const/(two*three),-const*one/(three*two**2),-const*one/(two*three**2)]

    val_df = pa.DataFrame()
    val_df['one'] = pa.Series(values,index=indicies)
    val_df['two'] = pa.Series(values2,index=indicies)
    val_df['three'] = pa.Series(values3,index=indicies)
    def RatFun(one,two):
        return const*one*two

    def RatFunDer(one,two):
        return [const*two,const*one]

    testdata = AutoCorrelate(Fun=[thisFun,thisDer],name='test_bootstrap_uniform',data=val_df[['one']])
    testdata2 = AutoCorrelate(Fun=[thisFun,thisDer],name='test_bootstrap_arange',data=val_df[['two']])
    testdata3 = AutoCorrelate(Fun=[thisFun,thisDer],name='test_bootstrap_normal',data=val_df[['three']])
    testdatarat = AutoCorrelate(Fun=[RatFun,RatFunDer],name='test_auto_ratio',data=val_df[['one','two']])

    this_info = pa.Series()
    this_info['save_file'] = this_dir+'/TestGraphs/test_Wopt.pdf'
    this_info['title'] = 'Test Auto Graph'
    # this_info['xlims'] = [0,10]
    # this_info['ylims'] = [0,15]
    import PlotData as jpl
    data_plot = jpl.Plotting(plot_info=this_info)
    data_plot = testdata.PlotWopt(data_plot)
    data_plot = testdata2.PlotWopt(data_plot)
    data_plot = testdata3.PlotWopt(data_plot)
    data_plot = testdatarat.PlotWopt(data_plot)
    # data_plot.LoadPickle(DefWipe=False)
    data_plot.PrintData()
    data_plot.PlotAll()

    this_info = pa.Series()
    this_info['save_file'] = this_dir+'/TestGraphs/test_Auto.pdf'
    this_info['title'] = 'Test Auto Graph'
    # this_info['xlims'] = [0,10]
    # this_info['ylims'] = [0,15]
    import PlotData as jpl
    data_plot = jpl.Plotting(plot_info=this_info)
    data_plot = testdata.PlotTauInt(data_plot)
    data_plot = testdata2.PlotTauInt(data_plot)
    data_plot = testdata3.PlotTauInt(data_plot)
    data_plot = testdatarat.PlotTauInt(data_plot)
    # data_plot.LoadPickle(DefWipe=False)
    data_plot.PrintData()
    data_plot.PlotAll()

    return testdata,testdata2,testdata3,testdatarat
コード例 #24
0
# Major library imports
import numpy as np
import PlotData as jpl

# Enthought library imports
from enable.api import Component, ComponentEditor, ColorTrait
from traits.api import HasTraits, Instance, Float, String, List, Enum, Bool, Int
from traitsui.api import Item, HSplit, VGroup, View

# Chaco imports
from chaco.api import ArrayPlotData, HPlotContainer, Plot
from chaco.api import marker_trait
from chaco.tools.api import PanTool, ZoomTool

data_plot = jpl.TestChaco()
simple_col = ['blue', 'red', 'green', 'purple', 'gold', 'black']


def hex_to_rgb(value):
    if not isinstance(value, str):
        return value
    value = value.lstrip('#')
    lv = len(value)
    return tuple(
        float(int(value[i:i + lv // 3], 16)) / 255.
        for i in range(0, lv, lv // 3))


class BeginFrame(HasTraits):
    """
コード例 #25
0
data = np.loadtxt('ex1data1.txt', delimiter=",")  #下载数据文件,此数据文件数组为array数组
#print(data)#显示数据
X = data[:, 0:-1]  #提取特征变量数组
y = data[:, -1]  #提取标签数组,此处提取出来后会变一维
#print(X)
#print(y)
y = y.reshape(-1, 1)  #对数组变形,使维数与特征变量数组相同
#print(y_1)
#print(sum(X))
#调用自己的数组标准化库进行操作
h = ns.Nor()
#X=h.S_n_normalize(X,"standard")#数组标准化,数据符合正态分布
X = h.S_n_normalize(X, "normalize")  #数组归一化,数据都在零和一之间
print(X)
print("数据初步可视化")  #其过欠拟合分析主要是数据可视化看出来的
print(PD.plot1(X, y))  #对数据进行初步可视化,看其标准化效果
"""发现有异常值,调用自己的异常值处理库对异常值进行处理
g=dl.Err()
X=g.S_errvaldeal(X,'mean')
print((PD.plot1(X,y)))#进行进一步可视化看处理效果"""
X1 = X
m = X.shape[0]  #获取原特征矩阵的行数
n = X.shape[1]  #获取原特征矩阵的列数
ones = np.ones(m).reshape(-1, 1)
X = np.hstack([ones, X])  #特征矩阵中合并一个x0矩阵,x0初始为1
#print(X)
q = int(m * 0.3)
#print(q)
X_train = X[q:, :]  #选出样本中的百分之三十作验证集,其余为训练集
y_train = y[q:, :]  #运用切片完成
X_val = X[:q, :]
コード例 #26
0
# Script utlizzato per laciare i modelli
# Autore: Cristian Di Pietrantonio


# import necessari
from pymodelica import compile_fmu
from pyfmi import load_fmu

import sys
sys.path.append("../../../Utilities")
import os
import PlotData as plt
import numpy as N


model_name="ClosedSystem"

#sono caricati tutti i file ".mo" nella cartella
model_files = [f for f in os.listdir(".") if ".mo" in f]

#input_object = (['noise', 'failures'], input_function)

model_comp = compile_fmu(model_name, model_files)
model = load_fmu(model_comp)
maxt = 10
res = model.simulate(start_time=0, final_time=maxt)

plt.plotData(2, 2, ['process1.myState', 'process2.myState', 's.turn', 'm.y'], res)
コード例 #27
0
ファイル: main_three.py プロジェクト: karagg/yangmintu
data = np.loadtxt('ex1data1.txt', delimiter=",")  #下载数据文件,此数据文件数组为array数组
#print(data)#显示数据
X = data[:, 0:-1]  #提取特征变量数组
y = data[:, -1]  #提取标签数组,此处提取出来后会变一维
#print(X)
#print(y)
y = y.reshape(-1, 1)  #对数组变形,使维数与特征变量数组相同
#print(y_1)
#print(sum(X))
#调用自己的数组标准化库进行操作
h = ns.Nor()
#X=h.S_n_normalize(X,"standard")#数组标准化,数据符合正态分布
X = h.S_n_normalize(X, "normalize")  #数组归一化,数据都在零和一之间
print(X)
print("数据初步可视化")
print(PD.plot1(X, y))  #对数据进行初步可视化,看其标准化效果

X1 = X.copy()
m = X.shape[0]  #获取原特征矩阵的行数
n = X.shape[1]  #获取原特征矩阵的列数
ones = np.ones(m).reshape(-1, 1)
X = np.hstack([ones, X])  #特征矩阵中合并一个x0矩阵,x0初始为1
#print(X)
theta = np.zeros((n + 1, 1))  #theta初始为行数n+1的零矩阵
theta = ht.hold_out2(X, y, 0.8, 5)  #留出集评估
print("一般情况下,J_train比较大,为过拟合,即高偏差情况,若J_test远大于J_train为欠拟合")
#theta=normalEqu(X,y)#选择较优值进行模型构建
#print('theta')
#print(theta)#显示theta

x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)  #显示拟合曲线查看拟合效果
コード例 #28
0
 def ShowMacro(self):
     data = sd.load_obj('Macro')
     pd.PlotMacro(data['MoneySupply'], 'M2', '2016-1-1', '2017-11-1',
                  ['m2'])
コード例 #29
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectFileName = fn.create_prof_vect_name(sys.argv, True)
    simMatFileName = fn.create_sim_mat_name(sys.argv)
    predsFileName = fn.create_preds_name(sys.argv)

    print(vectFileName)
    print(simMatFileName)
    print(predsFileName)

    # Grab the ratings vector
    if '-d' in sys.argv:
        ratings = read.difficulty_rating_vect(vectFileName)
    else:
        ratings = read.overall_rating_vect(vectFileName)

    # Assign similarity metric
    sim_f = vp.inverse_euclidean_distance
    if '-cos' in sys.argv:
        sim_f = vp.cosine_similarity
    elif '-pear' in sys.argv:
        sim_f = vp.abs_pearson_correlation

    # Set if weighted or not
    weighted = True
    if '-unweighted' in sys.argv:
        weighted = False

    # Grab predictions or create them if not available
    predictions = read.knn_predictions(predsFileName)
    if predictions is None:

        simMat = read.similarity_matrix(simMatFileName)
        if simMat is None:
            wordVects = read.word_vects(vectFileName)
            if wordVects is None:
                print("Vector file " + vectFileName + " does not exist")
                exit()
            wordVects = vp.process_token_vectors(wordVects, sys.argv)
            simMat = knn.get_similarity_matrix(wordVects, sim_f)
            write.similarity_matrix(simMat, simMatFileName)

        predictions = knn.knn_dataset(ratings, MaxK, simMat, weighted)
        write.knn_predictions(predictions, predsFileName)

    idxToPlot = None

    if '-maxK' in sys.argv:
        maxK = int(sys.argv[sys.argv.index('-maxK') + 1])
        predictions = predictions[:, :maxK]

    pidVect = read.pid_vect(vectFileName)
    singleRevIdxs = vp.pids_to_idxs(pidVect,
                                    read.pids_file(fn.PidsSingleRevFile))
    smallLenIdxs = vp.pids_to_idxs(pidVect,
                                   read.pids_file(fn.PidsSmallRevLenFile))

    plotFileName = None
    if '-save' in sys.argv:
        plotFileName = fn.create_knn_accuracy_plot_name(sys.argv)

    # Output results of the run
    plot.knn_error(
        predictions,
        ratings,
        title=plot.create_knn_error_title(sys.argv),
        idxToPlot=[singleRevIdxs, smallLenIdxs],
        subTitles=[
            "Error with profs with one review",
            "Error with profs with aggrigate review " +
            "lengths one std div above the mean " + "review length or less"
        ],
        saveFile=plotFileName)
コード例 #30
0
# In[]
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PlotData
import scipy.optimize as op

# In[]
df = pd.read_csv("ex2data1.txt", names=['Exam 1', 'Exam 2', 'Admitted'])
x = df.iloc[:, 0:2].values
y = df.iloc[:, 2:3].values

# In[]
PlotData.plotData(x, y)
theta = np.zeros([1, 3])
ones = np.ones([x.shape[0], 1])
X = np.concatenate((ones, x), axis=1)


# In[]
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def cost(x, y, theta):
    a = y * np.log(sigmoid(x @ theta.T))
    b = (1 - y) * np.log(1 - sigmoid(x @ theta.T))
    return np.sum(-(a + b) / len(x))


def gradient(x, y, theta, alpha, iters):
コード例 #31
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    profDicts = read.prof_dicts()

    revLens = st.rev_len_arr(profDicts)
    print("Number of Reviews:", revLens.shape[0])
    print("Mean review length:", revLens.mean())
    print("Std Dev review length:", revLens.std())
    print()

    numRevsProf = st.num_revs_profs(profDicts)
    print("Number of professors:", numRevsProf.shape[0])
    print("Mean num reviews per prof:", numRevsProf.mean())
    print("Std Dev num revews per prof:", numRevsProf.std())
    print()

    profRevLen = st.profs_revs_len(profDicts)
    print("Mean tokens per prof:", profRevLen.mean())
    print("Std Dev tokens per prof:", profRevLen.std())
    print()

    overRats = np.array([prof['rating_overall'] for prof in profDicts],
                        dtype=float)
    diffRats = np.array([prof['rating_difficulty'] for prof in profDicts],
                        dtype=float)

    overRatMean = overRats.mean()
    diffRatMean = diffRats.mean()

    print("Overall ratings mean:", overRatMean)
    print("Overall ratings std dev:", overRats.std())
    print("Difficulty ratings mean:", diffRatMean)
    print("Difficulty ratings std dev:", diffRats.std())
    print()

    overMeanDiff = overRats - overRatMean
    overMeanDiff = np.abs(overMeanDiff)
    diffMeanDiff = diffRats - diffRatMean
    diffMeanDiff = np.abs(diffMeanDiff)

    print("Nieve approach to prediction: Guessing the Mean")
    print("All profs")
    print("Overall absolute error mean:", overMeanDiff.mean())
    print("Overall absolute error std div:", overMeanDiff.std())
    print("Difficulty absolute error mean:", diffMeanDiff.mean())
    print("Difficulty absolute error std div:", diffMeanDiff.std())
    print()

    oneRevPids = set(read.pids_file(fn.PidsSingleRevFile))
    oneOverRats = np.array([
        prof['rating_overall']
        for prof in profDicts if prof['pid'] in oneRevPids
    ],
                           dtype=float)
    oneOverDiff = np.abs(oneOverRats - oneOverRats.mean())

    print("Profs with one review")
    print("One review absolute error mean:", oneOverDiff.mean())
    print("One review absolute error std div:", oneOverDiff.std())
    print()

    smallRevPids = set(read.pids_file(fn.PidsSmallRevLenFile))
    smallOverRats = np.array([
        prof['rating_overall']
        for prof in profDicts if prof['pid'] in smallRevPids
    ],
                             dtype=float)
    smallOverDiff = np.abs(smallOverRats - smallOverRats.mean())

    print("Profs with short reviews")
    print("Small review absolute error mean:", smallOverDiff.mean())
    print("small review absolute error std div:", smallOverDiff.std())
    print()

    save = False
    if '-save' in sys.argv:
        save = True

    plot.plot_word_review_count(revLens, profRevLen, numRevsProf, save=save)
コード例 #32
0
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("training")


        file = os.path.join(dir, "../run.sh")
        logger.info("creating training input file")
        gen.generate(args.reqId)

        logger.info("training begun")
        call([file,args.reqId, str(nodeId), srcId])

        file = os.path.join(dir, "../test.sh")
        logger.info("evaluation begun")
        call([file, args.reqId, str(nodeId), srcId, emergent_log_file])

        plt.setupTrainingOutput(args.reqId)
        logger.info("Storing output")
        dc.store_correlation(request_id=args.reqId)
        logger.info("Finished.")
        log_end_time("training")

    else:
        logger.info("Started prediction job")
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("prediction")

        file = os.path.join(dir, "../predict.sh")
        logger.info("creating prediction input file")
        genP.generate(args.reqId)
        logger.info("prediction begun")
        call([file, args.reqId, str(nodeId), str(srcId), emergent_log_file])
コード例 #33
0
# Script utlizzato per laciare i modelli
# Autore: Cristian Di Pietrantonio


# import necessari
from pymodelica import compile_fmu
from pyfmi import load_fmu

import sys
sys.path.append("../../../model_checking/dfs-cristian/PyLib")
import os
import PlotData as plt
import numpy as N


model_name="ClosedSystem"

#sono caricati tutti i file ".mo" nella cartella
model_files = [f for f in os.listdir(".") if ".mo" in f]

#input_object = (['noise', 'failures'], input_function)

model_comp = compile_fmu(model_name, model_files)
model = load_fmu(model_comp)
maxt = 1000
res = model.simulate(start_time=0, final_time=maxt, options={'ncp':1000})

plt.plotData(2, 2, ['ec.quantX', 's.pOpen', 'm.y', 's.riverLoad'], res)
コード例 #34
0
X2 = data[:, 1]  #一维数组
#print(X2)
#print(y)
y = y.reshape(-1, 1)  #对数组变形,使维数与特征变量数组相同,实际意义,房子的价钱
#print(y)
#print(sum(X))
#调用自己的数组标准化库进行操作
h = ns.Nor()
#X=h.S_n_normalize(X,"standard")#数组标准化,数据符合正态分布
X = h.S_n_normalize(X, "normalize")  #数组归一化,数据都在零和一之间
#y=h.S_n_normalize(y,"normalize")#数组归一化,数据都在零和一之间
print(X)
X1 = X[:, 0]  #一维数组,为房子的面积
X2 = X[:, 1]  #一维数组,为房子的高度
#print("数据初步可视化")
print(PD.plot3(X1, X2, y.ravel()))  #对数据进行初步可视化,看其标准化效果
X1 = X1.reshape(-1, 1)
X2 = X2.reshape(-1, 1)
X = X1 * X2  #房子的总体积
X = vc.Degree(4, X)  #多项式选择
m = len(y)
#ones=np.ones(m).reshape(-1,1)
#X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1
#print(X)
"""对惩罚项运用交叉验证得出的代价函数值进行选择,选择出较好的惩罚值的模型,然后进行模型性能评估"""
reg_choose(X, y, numval=10)  #10折交叉验证得到不同惩罚值的代价函数表
print("从函数表可以看出,J值均比较大,可能为欠拟合")
theta = normalEqu_Reg(X, y, 0.0)  #选出较好的theta,此时惩罚值为零
x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)  #显示拟合曲线查看拟合效果
x_test3 = x_test1 * x_test1
x_test3 = vc.Degree(4, x_test3)
コード例 #35
0
 def ShowFundamental(self, year, quarter, area=None, industry=None):
     data = sd.load_obj(str(year) + str(quarter) + 'Qfundamental')
     pd.PlotSingleFundamental(data['basic'], 'profit', area, industry)
     pd.PlotMultiFundamental(data['basic'], 'pe', 'rev', 'totals', 'profit',
                             area, industry)
コード例 #36
0
        logger.info("Started training job")
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("training")

        file = os.path.join(dir, "../run.sh")
        logger.info("creating training input file")
        gen.generate(args.reqId)

        logger.info("training begun")
        call([file, args.reqId, str(nodeId), srcId])

        file = os.path.join(dir, "../test.sh")
        logger.info("evaluation begun")
        call([file, args.reqId, str(nodeId), srcId, emergent_log_file])

        plt.setupTrainingOutput(args.reqId)
        logger.info("Storing output")
        dc.store_correlation(request_id=args.reqId)
        logger.info("Finished.")
        log_end_time("training")

    else:
        logger.info("Started prediction job")
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("prediction")

        file = os.path.join(dir, "../predict.sh")
        logger.info("creating prediction input file")
        genP.generate(args.reqId)
        logger.info("prediction begun")
        call([file, args.reqId, str(nodeId), str(srcId), emergent_log_file])
コード例 #37
0
 def ShowPrice(self, ticker):
     data = sd.load_obj(ticker + 'price')
     pd.PlotPrice(data['price'], ticker)
     pd.PlotTick(data['tick'], ticker)
コード例 #38
0
ファイル: runFFNN.py プロジェクト: dobule/ProfessorRatings
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectorFileName = fn.create_prof_vect_name(sys.argv, True)
    tokenVects = read.word_vects(vectorFileName)
    if tokenVects is None:
        print("Could not find token vects")
        print("Use 'createProfVectors.py' to create vectors")
        exit()

    tokenVects = vp.process_token_vectors(tokenVects, sys.argv)

    if '-d' in sys.argv:
        ratings = read.difficulty_rating_vect(vectorFileName)
    else:
        ratings = read.overall_rating_vect(vectorFileName)

    # Create Training and validation sets
    pidVect = read.pid_vect(vectorFileName)

    nonSingleSmallIdxs = ffnn.non_single_small_idxs(pidVect)
    singleIdxs = vp.pids_to_idxs(pidVect, read.pids_file(fn.PidsSingleRevFile))
    smallIdxs = vp.pids_to_idxs(pidVect,
                                read.pids_file(fn.PidsSmallRevLenFile))
    singleSmallIdxs = list(set(singleIdxs).union(set(smallIdxs)))
    singleSmallIdxs.sort()
    singleSmallIdxs = np.array(singleSmallIdxs)

    trainingVects = tokenVects[nonSingleSmallIdxs, :]
    trainingRatings = ratings[nonSingleSmallIdxs]

    validVects = tokenVects[singleSmallIdxs, :]
    validRatings = ratings[singleSmallIdxs]

    print(trainingVects.shape, trainingRatings.shape, validVects.shape,
          validRatings.shape)
    """
  
   xTrain, xValid, yTrain, yValid = train_test_split(tokenVects, ratings,
                                                      test_size=0.3)
   """
    # Select and train model
    if '-deep' in sys.argv:
        model = ffnn.deep_model(tokenVects.shape[1])
    else:
        model = ffnn.shallow_model(tokenVects.shape[1])

    history = model.fit(trainingVects,
                        trainingRatings,
                        epochs=10,
                        batch_size=5,
                        validation_data=(validVects, validRatings))

    plotTitle = plot.ffnn_error_title(sys.argv)
    outfile = None
    if '-save' in sys.argv:
        outfile = fn.create_ffnn_plot_name(sys.argv)

    plot.ffnn_error(history, title=plotTitle, filename=outfile)