예제 #1
0
def dataGettingEvent(sc):
    print("Fetching data")
    getData.main()

    #re-setup scheduler
    # todo set this way higher
    datagettingScheduler.enter(180, 1, dataGettingEvent, (sc,))
예제 #2
0
def main():
    getLogger()
    logging.info("start ETL process")
    if config.CREATE_DATABASE_OBJECTS_FLAG:
        logging.info("create DB")
        databaseUtils.main()
    logging.info("download data")
    getData.main()
    logging.info("prepare data")
    shipping_lines_data = prepareData.get_shipping_lines_data()
    logging.info("load data")
    databaseUtils.load_shipping_lines_table(shipping_lines_data)
    logging.info("end ETL process")
예제 #3
0
def get_data():
    try:
        data = pd.read_csv("rb1901.csv", index_col=0)
        data.index = pd.to_datetime(data.index)
    except:
        import getData
        getData.main()
        data = pd.read_csv("rb1901.csv", index_col=0)
        data.index = pd.to_datetime(data.index)
    days_number = np.load("days_number.npy")
    da = [
        data.iloc[days_number[i]:days_number[i + 1]]
        for i in xrange(len(days_number) - 1)
    ]
예제 #4
0
def stat():

    form = UserDate(request.form)
    requsted_date_from = form.input_date_from._value()
    requsted_date_to = form.input_date_to._value()

    all_managers = getData.main(requsted_date_from, requsted_date_to)
    visible_managers = {}
    visible_dealingers = {}

    for num in managers_num:
        if all_managers.has_key(num):
            visible_managers[num] = all_managers[num]

    for num in dealingers_num:
        if all_managers.has_key(num):
            visible_dealingers[num] = all_managers[num]

    return render_template("stat.html",
        work_hours = work_hours,
        all_hours = all_hours,
        today = requsted_date_from,
        title = 'Get Report',
        form = form,
        dealingers = visible_dealingers,
        managers_names = managers_names,
        visible_managers = visible_managers,
        managers = all_managers)
def main():
    messageSize = 32
    mostFrequentWords = 9999
    fullLog, wordFreq = getData.main()
    #data, labels, nameDict = standardizePackets(fullLog, messageSize)
    dictionary = convertFreqTable(wordFreq, mostFrequentWords)
    data, labels = standardizePackets(fullLog, messageSize, dictionary)
    #trainingData, trainingLabels, testingData, testingLabels = splitTrainTest(data, labels, .9)
    printToFile(data, 'data.txt')
    printToFile(labels, 'labels.txt')
    return data, labels, (messageSize)
예제 #6
0
def main(careerData=None,seasonStats=None):
    """
    This is the main module.  This can be run without any inputs, in which case
    the careerData and seasonStats variables will be creating using modules
    in the getData.py file.  Once all the features are created they are saved
    as pickle files.
    
    Inputs:
        careerData - dictionary or pickle file, optional (default=None)
        
            This is the dictionary containing all relevant career data.
            If the data has not been collected previously, then it will be
            collected with the default input.  Either a dictionary or a saved
            pickle file are acceptable inputs

        seasonStats - dictionary of pickle file, optional (default=None)
        
            This is the dictionary containing end of season stats for all 
            relevant years. If the data has not been collected previously, 
            then it will be collected with the default input.  Either a 
            dictionary or a saved pickle file are acceptable inputs

    Outputs: 
        nonRookieData - dictionary
        
            This is the dictionary containing all relevant feature data for
            Non-Rookies. The keys are the seasons and the values are 
            the features
            
        rookieData - dictionary

            This is the dictionary containing all relevant feature data for
            Rookies. The keys are the seasons and the values are 
            the features

        careerData - dictionary
        
            If a new player is found during feature extraction that was not
            previously in the saved careerData variable, it will be updated.
    """
    if careerData:
        try: careerData = pickle.load(careerData)
        except: pass
    if seasonStats:
        try: seasonStats = pickle.load(seasonStats)
        except: pass  
    if not (careerData and seasonStats):
        careerData,seasonStats=getData.main()
    nonRookieData,rookieData,careerData=getTrainData(careerData,seasonStats)
    pickle.dump(nonRookieData,open('nonRookieData.p','wb'))
    pickle.dump(rookieData,open('rookieData.p','wb'))
    pickle.dump(careerData,open('careerData.p','wb'))
    return(nonRookieData,rookieData,careerData)
예제 #7
0
def testMethods(nonRookieData=None,rookieData=None,careerData=None):
    """
    The test suite for deciding the best model
    """
    if not careerData:
        seasonStats,careerData,lookUp = getData.main()
    if not (nonRookieData and rookieData):
        nonRookieData,rookieData,careerData=getFeatures.main(careerData)
    resultsNonRookies,predsNonRookies = getCrossVal(nonRookieData,careerData)
    resultsRookies,predsRookies = getRookieCrossVal(rookieData,careerData)
    resultsNonRookies = writeResToPandas(resultsNonRookies,'nonRookies')
    resultsRookies = writeResToPandas(resultsRookies,'rookies')
    return(resultsNonRookies,resultsRookies,predsNonRookies,predsRookies)
예제 #8
0
def predictNextSeason(year=2015,nonRookieData='nonRookieData.p',rookieData='rookieData.p',careerData='careerData.p',seasonStats='seasonStats.p'):
    """This module uses previously selected algorithms:
            -Veterans: Random Forest; n_estimators=500, min_samples_split=125
            -Novices: SVM; C=.15, gamma = .015, epsilon= .05
        It then trains the models and generates predictions in csv format

        Inputs:
            year - string, optional (default=2015)

            nonRookieData,rookieData,careerData,seasonStats - dictionary, optional (default = None)

                If None, then the variable is generated using the getFeatures.py file
            
        Outputs:
            predictionNonRookies,predicitonRookies - Pandas Dataframe
                
                The dataframes containing the predictions for both groups
    """
    t0=time.time()
    last2digits=str(year)[-2:]
    season = str((datetime.datetime(year,1,1)-datetime.timedelta(days=365)).year)+'-'+last2digits
    if not (careerData and seasonStats):
        seasonStats,careerData,lookUp = getData.main()
    if not (nonRookieData and rookieData):
        nonRookieData,rookieData,careerData=getFeatures.main(careerData)
    nonRookieData,rookieData,careerData,seasonStats=tryPickle(nonRookieData,rookieData,careerData,seasonStats)
    print('All past data found! Now fitting models ',time.time()-t0)
    nonRookiesModel,nonRookiesTrain,nonRookiesScaler = getModel(nonRookieData,'nonRookies')
    rookiesModel,rookiesTrain,rookiesScaler = getModel(rookieData,'rookies')
    print('Models fitted! Now getting all current players features ',time.time()-t0)
    nonRookies,rookies = findPlayerFeatures(year,seasonStats[year],careerData,
        nonRookiesTrain,rookiesTrain,nonRookiesScaler,rookiesScaler)
    print('Features found! Now making predictions ',time.time()-t0)
    predictionsNonRookies = getPredictions(nonRookies,nonRookiesModel,'nonRookies')
    print('Non-Rookie Predictions made! Now predicting Rookies ',time.time()-t0)
##    predictionsNonRookies.to_csv(season+'_Veteran_Predictions.csv',index=False)
    predictionsRookies = getPredictions(rookies,rookiesModel,'rookies')
##    predictionsRookies.to_csv(season+'_Novice_Predictions.csv',index=False)
    predictionsNonRookies.append(predictionsRookies).to_csv(season+'_Predictions.csv',index=False)
    print('Total Runtime is ',time.time()-t0,'s')
    return(predictionsNonRookies,predictionsRookies)
예제 #9
0
def test_production_analysis_with_energies():
    args = default_args()
    args['energies'] = 'Yes'
    getData.main(args)
예제 #10
0
def test_production_analysis():
    getData.main(default_args())
예제 #11
0
def main(runPams):
    timeStam = str(int(time.time()))
    #saveExcelPath = "C:\\Users\\pdang\\Desktop\\" + timeStam + ".xlsx"
    saveExcelPath = "/N/project/zhangclab/pengtao/myProjectsDataRes/20200113Predicte/results/l1NormCRNumCNN_small/block1/excelRes/" + timeStam + ".xlsx"
    #st = time.time()
    # get samples, featureMap, optFeatureMap
    olabel, samples, featureMap, optFeatureMap = getData.main(runPams)
    '''
    [print(olabel[i], samples[i]) for i in range(len(samples))]
    print("------------------------------------------------------")
    [print(olabel[i], featureMap[i]) for i in range(len(featureMap))]
    print("--------------------------------------------------")
    [print(olabel[i], optFeatureMap[i]) for i in range(len(optFeatureMap))]
    print("---------------------------------------------------------------")
    print(olabel.size())
    print(samples.size())
    print(featureMap.size())
    print(optFeatureMap.size())
    sys.exit()
    '''

    # choose spu or gpu automatically
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # samples data
    net, optimizer, lossFunc = getCNNPams(samples.size()[1],
                                          samples.size()[2],
                                          samples.size()[3], device,
                                          runPams.lr)
    sres, sytrue_ypred = Predicte.myUtils.myTrainTest.train_test(
        olabel, samples, net, device, optimizer, lossFunc, runPams)

    # featureMap data
    net, optimizer, lossFunc = getCNNPams(featureMap.size()[1],
                                          featureMap.size()[2],
                                          featureMap.size()[3], device,
                                          runPams.lr)
    fres, fytrue_ypred = Predicte.myUtils.myTrainTest.train_test(
        olabel, featureMap, net, device, optimizer, lossFunc, runPams)
    # optFeatureMap data
    net, optimizer, lossFunc = getCNNPams(optFeatureMap.size()[1],
                                          optFeatureMap.size()[2],
                                          optFeatureMap.size()[3], device,
                                          runPams.lr)
    ores, oytrue_ypred = Predicte.myUtils.myTrainTest.train_test(
        olabel, optFeatureMap, net, device, optimizer, lossFunc, runPams)
    # prepare results

    res = list()
    if runPams.minusMean == 1:
        res.append("c*r-E")
    else:
        res.append("c*r")
    res.append(runPams.xn)
    res.append("N(0-" + str(runPams.stdBias / 10) + ")")
    res.append("10*" + str(runPams.sampleNum))
    res.append(runPams.numThreshold)
    res.append("7*" + str(samples.size()[2]))
    res.append(sres)
    res.append("7*" + str(featureMap.size()[3]))
    res.append(fres)
    res.append("7*" + str(optFeatureMap.size()[3]))
    res.append(ores)
    # save data to excel
    resDF = pd.DataFrame(res)
    resDF.columns = ["res"]
    sytrue_ypred = pd.DataFrame(sytrue_ypred)
    sytrue_ypred.columns = ["true", "pred"]
    cytrue_ypred = pd.DataFrame(fytrue_ypred)
    cytrue_ypred.columns = ["true", "pred"]
    oytrue_ypred = pd.DataFrame(oytrue_ypred)
    oytrue_ypred.columns = ["true", "pred"]

    writer = pd.ExcelWriter(saveExcelPath)  # 写入Excel文件
    resDF.to_excel(writer, index=False)
    sytrue_ypred.to_excel(writer, startcol=2, index=False)
    cytrue_ypred.to_excel(writer, startcol=5, index=False)
    oytrue_ypred.to_excel(writer, startcol=8, index=False)
    writer.save()
    writer.close()
    # output data
    res = ','.join(str(i) for i in res)
    print(res)
    return ()
예제 #12
0
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
#import getData

__all__ = [
    "data", "futurepng", "days_number", "get_vwap", "vwap", "last",
    "rejectData", "getData", "get_vabp", "get_mabp"
]

try:
    data = pd.read_csv("rb1901.csv", index_col=0)
    data.index = pd.to_datetime(data.index)
except:
    import getData
    getData.main()
    data = pd.read_csv("rb1901.csv", index_col=0)
    data.index = pd.to_datetime(data.index)

days_number = np.load("days_number.npy")
da = [
    data.iloc[days_number[i]:days_number[i + 1]]
    for i in xrange(len(days_number) - 1)
]


def get_data():
    try:
        data = pd.read_csv("rb1901.csv", index_col=0)
        data.index = pd.to_datetime(data.index)
    except:
예제 #13
0
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import getData


dataset = getData.main()

# box and whisker plots
dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()




# histograms
dataset.hist()
plt.show()


# scatter plot matrix
scatter_matrix(dataset)