Ejemplo n.º 1
0
def deleteDuplicatedPrediction(mode):
    duplicatedPredictions = list(stockMongo.findDuplicatedPrediction(mode));

    dateSymbol_df = pd.DataFrame([item['_id'] for item in duplicatedPredictions])
    '''
              Date Symbol
    0   2017-08-10   SFBC
    1   2017-08-10   SVBI
    2   2017-08-10  SENEB
    '''
    
#     k: v for k, v in df.groupby('Region')
    dateSymbols = {date:list(group_df['Symbol']) for date, group_df in dateSymbol_df.groupby('Date')}
#     for date, symbol_df in dateSymbol_df.groupby('Date'):
#         dateSymbols[date] = list(symbol_df['Symbol'])
    '''
    dateSymbols
    {
    '2017-06-12': ['JIVE'], 
    '2017-06-15': ['BNCN'], 
    '2017-06-16': ['CNCO', 'SPAN', 'GNVC'],
    ...
    }
    '''
    
    parallel.runToAllDone(stockMongo.deletePredictionBySymbolAndDate, [(mode, symbols, date) for date, symbols in dateSymbols.items()], NUMBER_OF_PROCESSES=8)
Ejemplo n.º 2
0
def deleteDuplicatedQuote(startDate, endDate):
    # check data month by month
#     symbols = stockMongo.findAllActiveSymbols()
#     startDate = '2017-04-01'
#     endDate = '2017-07-01'
    print(startDate, endDate)
    duplicatedQuotes = stockMongo.findDuplicatedQuotes(startDate, endDate)
    runToAllDone(deleteQuoteHasNoNextClose, [(quoteCount,) for quoteCount in duplicatedQuotes]) 
Ejemplo n.º 3
0
def isPreviousNextCloseUpdated(date):
    symbols = stockMongo.findAllActiveSymbols(); 
    start = dt.datetime.now()
    
    func = functools.partial(checkAndUpdate, date=date)
        
    parallel.runToAllDone(func, [(symbol,) for symbol in symbols], NUMBER_OF_PROCESSES=8)
    end = dt.datetime.now()
    print('time consumed:', (end - start).seconds)
Ejemplo n.º 4
0
def checkSymbolVolume():
    startDate = getPassedDate(120).strftime("%Y-%m-%d")
    symbols = pd.DataFrame(
        list(stockMongo.findSymbolsVolumesLessThan(startDate,
                                                   100000)))['_id'].values
    print(len(symbols))
    print(symbols)
    runToAllDone(stockMongo.addSymbolStatus,
                 [(symbol, "lowVolume") for symbol in symbols])
Ejemplo n.º 5
0
def learn(machineLearningMode, sepDate):
    symbols = pd.DataFrame(list(stockMongo.findAllActiveSymbols()))['Symbol'].values
#     symbols = ['KO']
    learningFunction = functools.partial(initialMachineLearning, machineLearningMode, None, sepDate)
    runToAllDone(learningFunction, [(symbol,) for symbol in symbols])        
           
    saveAccuracyFunc = functools.partial(saveAccuracy, machineLearningMode, None, sepDate)
    runToAllDone(saveAccuracyFunc, [(symbol,) for symbol in symbols])
        
    # verify the prediction of the rest data
    startDate = sepDate
    endDate = None
    runToAllDone(functools.partial(verifyQuote, machineLearningMode, startDate, endDate), [(symbol,) for symbol in symbols])
    

        
Ejemplo n.º 6
0
def runDailyJob():
    print(now(), "daily job is started...")
      
#     start = datetime.datetime.now().strftime("%Y-%m-%d")
#     end = datetime.datetime.now().strftime("%Y-%m-%d")
    start = '2017-08-28'
    end = '2017-08-28'
    
    # check if previous predict exists, if not, do it
    
    symbols = list(findAllActiveSymbols())
    fetchAndStore = functools.partial(fetchAndStoreQuotes, start=start, end=end)
    # fetchAndStore = lambda symbol: fetchAndStoreQuotes(symbol, start, end)
        
    runToAllDone(fetchAndStore, [(symbol,) for symbol in symbols], NUMBER_OF_PROCESSES=12)
         
    print(now(), "quote csv files were all downloaded...")
    loadAllQuoteFiles()
             
    time.sleep(60)
             
    print(now(), "save next tx data...")
    quotes = stockMongo.findQuotesByPeriod(start, end)
    runToAllDone(saveNextTxDayData, [(quote,) for quote in quotes])
          
    print(now(), 'starting prediction for next Tx day')
    runToAllDone(predictAndSave, [(symbol, start) for symbol in symbols])  # , NUMBER_OF_PROCESSES=1 
                 
    print(now(), 'generating predict report')
    predictReport() 
        
    # verify
    for date in pd.date_range(datetime.datetime.strptime(start, "%Y-%m-%d"), datetime.datetime.strptime(end, "%Y-%m-%d")):
        print('check', date)
        dataHealth.dailyCheck(date.strftime("%Y-%m-%d"))
        
    print(now(), "daily job is done...")
Ejemplo n.º 7
0
def findAndDeleteDuplicatedQuote():
    dataRanges = getDateRanges('2016-08-20', '2017-04-01', 100)    
    runToAllDone(deleteDuplicatedQuote, dataRanges)
Ejemplo n.º 8
0
def loadAllQuoteFiles():
    csvFiles = list(filter(isFile, os.listdir(fileUtil.QUOTES_DIR)))
    # csvFiles = ['INF_2017-06-21_2017-06-21.csv']
    # with multiprocessing.Pool(multiprocessing.cpu_count() - 1) as p:
    runToAllDone(loadCsv, [(csvFile, ) for csvFile in csvFiles])