def __processInstrument__(self, instrument, ratioList, fromDate, toDate,
                              wrongInstrumentsSymbolLists, outputDict):
        logger.debug('%s_%s processing to vectorize ' %
                     (instrument.symbol, instrument.currency))
        if self.parallelDownloadInstruments is True and self.parallelDownloadRatios is True:
            self.parallelDownloadRatios = False
        if self.parallelDownloadRatios and len(ratioList) > self.threads:
            logger.info('__processInstrument__ parallel into %d threads ' %
                        self.threads)
            outputDict = mpPandasObj(
                func=self.__processInstrumentToVectorize__,
                pdObj=('ratioList', ratioList),
                numThreads=self.threads,
                mpBatches=1,
                isVerticalParallel=True,
                instrument=instrument,
                # ratioList=ratioList,
                fromDate=fromDate,
                toDate=toDate,
                outputDict=outputDict,
                wrongInstrumentsSymbolLists=wrongInstrumentsSymbolLists,
            )
        else:
            # logger.info('__processInstrument__ serialized ' )
            outputDict = self.__processInstrumentToVectorize__(
                instrument, ratioList, fromDate, toDate, outputDict,
                wrongInstrumentsSymbolLists)

        return outputDict
Exemple #2
0
 def getFactorData(self, instrumentList, ratioList, fromDate, toDate=None):
     logger.info(
         "Request factor data from db for\ninstrumentList: %s   \nratioList:%s \nfrom %s to %s"
         % (instrumentList, ratioList, fromDate, toDate))
     return self.factor_service.getDataDictOfMatrixAlphalens(
         instrumentList=instrumentList,
         ratioList=ratioList,
         fromDate=fromDate,
         toDate=toDate)
Exemple #3
0
    def __getAllInstrumentData__(self, instrumentList, ratioList, fromDate,
                                 toDate):

        wrongInstrumentsSymbolLists = []
        # nanDF = self.vectorizedDataService.__createDataframe__(instrumentList, fromDate, toDate)
        columnList = DataDictKeys.keys + ratioList
        outputDict = {}
        if len(instrumentList) < self.threads:
            self.threads = int(np.ceil(len(instrumentList) / 2))
            logger.debug('Modified threads of __getAllInstrumentData__ to %d' %
                         self.threads)
        if self.threads > 1:
            mpBatches = 1
            linMols = True
            if self.threads > 3:
                mpBatches = float(len(instrumentList)) / float(self.threads)
                mpBatches = int(min(int(mpBatches / 5), 50))
                if mpBatches < 1:
                    mpBatches = 1

            logger.debug('mpBatches of __getAllInstrumentData__ to %d' %
                         mpBatches)
            # mpBatches = 1

            outputDict = mpPandasObj(
                func=self.__getAllInstrumentSerial__,
                pdObj=('instrumentList', instrumentList),
                numThreads=self.threads,
                mpBatches=mpBatches,
                isVerticalParallel=True,
                linMols=linMols,
                columnList=columnList,
                fromDate=fromDate,
                toDate=toDate,
                outputDict=outputDict,
                wrongInstrumentsSymbolLists=wrongInstrumentsSymbolLists,
            )
        else:
            logger.info('Downloading data serialized')
            outputDict = self.__getAllInstrumentSerial__(
                instrumentList, columnList, fromDate, toDate, outputDict,
                wrongInstrumentsSymbolLists)

        for key in outputDict.keys():
            if isinstance(outputDict[key], pd.DataFrame):
                outputDict[key].fillna(method='ffill', inplace=True)
                outputDict[key].fillna(0, inplace=True)  # 1st row

        # filter to asked data
        if outputDict is not None:
            outputDictFinal = {}
            for ratioAsked in columnList:
                outputDictFinal[ratioAsked] = outputDict[ratioAsked]
        else:
            outputDictFinal = None

        return outputDictFinal
Exemple #4
0
    def __init__(self, user_settings):

        HistoricalMarketData.__init__(self, user_settings)

        self.inputPath = getDukascopyInputPath(user_settings)
        self.filesInDirectory = glob.glob(self.inputPath + os.sep + "*.csv")
        logger.info("dukascopy detected %i files can be processed" %
                    (len(self.filesInDirectory)))

        pass
Exemple #5
0
        def historical_data_handler(self, msg):
            logger.debug("IB historical received %s" % msg)

            if self.messageCounter == 0:
                logger.info("IB historical started! %s" % msg.date)
            self.messageCounter += 1
            if ('finished' in str(msg.date)) == True:
                logger.info("IB historical finished! %s" % msg.date)
                self.receivedAllHistorical = True
            else:
                self.appendData(msg)
    def getFundamentalDataProvider(self, ratio, instrument):
        asset_requested = instrument.asset_type
        try:
            provider = self.user_settings.asset_type_to_fundamental_data[
                asset_requested]
        except:
            logger.info(
                'No provided fundamental data provider in user_settings as dict asset_type_to_fundamental_data=> default'
            )
            provider = QuandlFundamentalData

        return provider
Exemple #7
0
 def getHistoricalData(self,
                       instrument,
                       period,
                       number_of_periods,
                       fromDate,
                       toDate=None,
                       bar_type=BarType.time_bar):
     # Need to be on pystore
     logger.info("Request historical data from db for %s_%s from %s to %s" %
                 (instrument.symbol, instrument.currency, fromDate, toDate))
     return self.historical_market_data_service.getHistoricalData(
         instrument, period, number_of_periods, fromDate, toDate, bar_type)
    def getBroker(self, instrument):
        asset_requested = instrument.asset_type
        try:
            provider = self.user_settings.asset_type_to_broker[asset_requested]

        except:
            logger.info(
                'No provided broker in user_settings as dict asset_type_to_broker=> default'
            )
            if asset_requested == AssetType.crypto:
                provider = GdaxConnector
            else:
                provider = EmailConnector

        return provider
    def getHistoricalMarketDataProvider(self, instrument):
        asset_requested = instrument.asset_type
        try:
            provider = self.user_settings.asset_type_to_historical_market_data[
                asset_requested]
        except:
            logger.info(
                'No provided historical market in user_settings as dict asset_type_to_historical_market_data=> default'
            )
            if asset_requested == AssetType.crypto:
                provider = CryptoCompareHistoricalMarketData
            else:
                provider = YahooHistoricalMarketData

        return provider
    def getDataDictOfMatrix(
        self,
        instrumentList,
        ratioList,
        fromDate,
        toDate=None,
        persistTempFile=None,
    ):
        fromDate = pd.datetime(day=fromDate.day,
                               month=fromDate.month,
                               year=fromDate.year)
        if toDate is None:
            toDate = pd.datetime.today()
        toDate = pd.datetime(day=toDate.day,
                             month=toDate.month,
                             year=toDate.year)
        if self.useFunctionTemp:
            logger.debug('getDataDictOfMatrix downloading/loading...')
            logger.debug('instrumentList:  %s' % instrumentList)
            logger.debug('ratioList:  %s' % ratioList)
            logger.debug('fromDate:  %s' % fromDate)
            logger.debug('toDate:  %s' % toDate)
            logger.debug('persistTempFile:  %s' % persistTempFile)

            functionTemp = self.cacher.cache(self.__getDataDictOfMatrix__,
                                             ignore=['self'])
            ratioList = self.getLongestRatioListDownloaded(
                instrumentList, ratioList, fromDate, toDate)
            outputDictFinal = functionTemp(
                __unstackInstrumentList__(instrumentList), ratioList, fromDate,
                toDate, None)
        else:
            logger.info('Not using cache function!')
            outputDictFinal = self.__getDataDictOfMatrix__(
                __unstackInstrumentList__(instrumentList), ratioList, fromDate,
                toDate, None)

        if persistTempFile is False:
            persistTempFile = None

        if persistTempFile is not None:
            logger.debug('Persisting ')
            if ~persistTempFile.endswith('.xlsx'):
                persistTempFile += '.xlsx'
            self.__createTempExcelFile__(outputDictFinal,
                                         filenameWithExtension=persistTempFile)
        self.__save_ratio_list__(instrumentList, ratioList, fromDate, toDate)
        return outputDictFinal
Exemple #11
0
    def download(self,
                 instrument,
                 period,
                 number_of_periods,
                 fromDate,
                 toDate=None):
        import datetime
        logger.debug("Downloading %s" % instrument)

        oandaInstrument = '%s_%s' % (instrument.symbol, instrument.currency)
        if period == Period.day:
            oandaGranularity = self.period_dict[period]
        else:
            oandaGranularity = '%s%i' % (self.period_dict[period],
                                         number_of_periods)
        # 2014-07-03T04:00:00.000000Z

        startDate = fromDate.strftime(self.formatDate)
        if toDate is None:
            toDate = datetime.datetime.today()
        endDate = toDate.strftime(self.formatDate)
        try:
            data_downloaded = self.oanda.get_history(
                instrument=oandaInstrument,
                granularity=oandaGranularity,
                start=startDate,
                end=endDate,
                candleFormat=self.candleFormat,
                dailyAlignment=self.dailyAlignment,
                alignmentTimezone=self.alignmentTimezone,
                weeklyAlignment=self.weeklyAlignment,
            )
        except Exception as e:
            logger.error("Cant download from oanda %s %s=> return None   %s" %
                         (instrument.symbol, period, e))
            return None
        logger.info("formatting oanda data for %s" % oandaInstrument)

        outputComplete = self.formatHistorical(data_downloaded, period=period)
        # Already added
        # outputComplete = self.setTimeCorrect(outputComplete, period=period, instrument=instrument)

        return outputComplete
Exemple #12
0
    def getDataDictOfMatrix(self,
                            instrumentList,
                            ratioList,
                            fromDate,
                            toDate=None,
                            persistTempFile=None):
        start = time.time()
        fromDate = pd.datetime(day=fromDate.day,
                               month=fromDate.month,
                               year=fromDate.year)
        if toDate is None:
            toDate = pd.datetime.today()
        toDate = pd.datetime(day=toDate.day,
                             month=toDate.month,
                             year=toDate.year)
        fromDate = convert_date(fromDate)
        toDate = convert_date(toDate)

        logger.debug('getDataDictOfMatrix downloading/loading...')
        logger.debug('instrumentList:  %s' % instrumentList)
        logger.debug('ratioList:  %s' % ratioList)
        logger.debug('fromDate:  %s' % fromDate)
        logger.debug('toDate:  %s' % toDate)
        logger.debug('persistTempFile:  %s' % persistTempFile)

        outputDict = self.__getAllInstrumentData__(instrumentList, ratioList,
                                                   fromDate, toDate)

        assetType = instrumentList[0].asset_type
        outputDictFinal = self.__cleanData__(outputDict,
                                             assetType=assetType,
                                             persistTempFile=persistTempFile)

        end = time.time()
        logger.info('******')
        minutesTime = (end - start) / 60
        logger.info('Took %f minutes to finish __getDataDictOfMatrix__' %
                    (minutesTime))
        logger.info('******')
        import os
        outputDictFinal = self.__allignSymbolsDictMatrix__(outputDictFinal)
        outputDictFinal = self.__cleanSymbolsDictMatrix__(outputDictFinal)
        logger.info('Finished => saving dictOfMatrix_last.pickle')
        save_to_file(
            outputDictFinal,
            getTempPath(self.user_settings) + os.sep +
            'dictOfMatrix_last.pickle')
        return outputDictFinal
Exemple #13
0
    def __makeRequestSingle__(self, contract, durationStr, toDateString,
                              barSizeSetting):

        self.receivedDataObject.reset()

        self.ib_object.reqHistoricalData(self.tickId, contract, toDateString,
                                         durationStr, barSizeSetting,
                                         self.whatToShow, 1,
                                         self.ib_formatDate_return)
        self.tickId += 1

        logger.info("send req historical %s : waiting" % contract.m_symbol)

        while (self.receivedDataObject.receivedAllHistorical is False):
            sleep(3)

        logger.debug("finished single request %s " % contract.m_symbol)
        dataframeReceived = self.receivedDataObject.getDataframe()
        if dataframeReceived is None:
            logger.error("Some error appears on single request !! check it")
            return None
        # is necessary???
        outputComplete = self.__formatHistorical__(dataframeReceived)
        return outputComplete
    def __getDataDictOfMatrix__(self,
                                instrumentStringStacked,
                                ratioList,
                                fromDate,
                                toDate=None,
                                persistTempFile=None):
        # with self.lock:
        logger.debug('__getDataDictOfMatrix__ downloading...')
        instrumentList = __stackInstrumentList__(instrumentStringStacked[0],
                                                 instrumentStringStacked[1],
                                                 instrumentStringStacked[2])
        if ratioList is None:
            ratioList = []
        # compatibility
        if persistTempFile is False:
            persistTempFile = None

        fromDate = convert_date(fromDate)
        toDate = convert_date(toDate)
        assetType = instrumentList[0].asset_type

        wrongInstrumentsSymbolLists = []
        start = time.time()
        typeDownload = ''
        if self.parallelDownloadInstruments and len(
                instrumentList) > self.threads:
            mpBatches = float(len(instrumentList)) / float(self.threads)
            mpBatches = min(int(mpBatches / 5), 50)
            mpBatches = 1
            logger.info(
                'Downloading Data using parallel[%d threads ,%d mpBatches] __processAllInstruments__ of %d instruments  and %d ratios'
                %
                (self.threads, mpBatches, len(instrumentList), len(ratioList)))

            outputDict = mpPandasObj(
                func=self.__processAllInstruments__,
                pdObj=('instrumentList', instrumentList),
                isVerticalParallel=True,
                numThreads=self.threads,
                mpBatches=mpBatches,
                linMols=True,
                ratioList=ratioList,
                fromDate=fromDate,
                toDate=toDate,
                wrongInstrumentsSymbolLists=wrongInstrumentsSymbolLists,
            )
            typeDownload = 'parallel'
        else:
            logger.info(
                'Downloading Data using serial __processAllInstruments__ of %d instruments  and %d ratios'
                % (len(instrumentList), len(ratioList)))
            outputDict = self.__processAllInstruments__(
                instrumentList, ratioList, fromDate, toDate,
                wrongInstrumentsSymbolLists)
            typeDownload = 'serial'
        end = time.time()
        logger.info('******')
        minutesTime = (end - start) / 60
        logger.info('Took %f minutes to finish %s __getDataDictOfMatrix__' %
                    (minutesTime, typeDownload))
        logger.info('******')
        dateFinal = None
        columnsFinalRemove = None
        for keys in outputDict.keys():
            if keys == 'wrong':
                continue
            outputDict[keys] = outputDict[keys][~outputDict[keys].index.
                                                duplicated(keep='last')]
            dateIndex = outputDict[keys][(
                outputDict[keys].fillna(0).sum(axis=1) != 0)].index
            # columnsClean = list(outputDict[keys].columns[outputDict[keys].fillna(0).sum()==0])
            if len(dateIndex) == 0:
                dateIndex = outputDict[keys].index
            # if len(columnsClean) ==0:
            #     del outputDict[keys]
            #     continue

            if dateFinal is None:
                dateFinal = dateIndex
            else:
                dateFinal = dateFinal.intersection(dateIndex)

            # if columnsFinalRemove is None:
            #     columnsFinalRemove = columnsClean
            # else:
            #     columnsFinalRemove=list(set(columnsFinalRemove+columnsClean))

        for keys in outputDict.keys():
            if keys == 'wrong':
                continue
            # outputDict[keys].drop(columns = columnsFinalRemove,inplace=True)
            mask = dateFinal.searchsorted(outputDict[keys].index)
            outputDict[keys] = outputDict[keys][mask > 0]

            outputDict[keys].fillna(0, inplace=True)

        logger.debug('all instruments processed => cleaning ')
        wrongInstrumentsSymbolLists = outputDict['wrong'].copy()
        if 'wrong' in outputDict:
            del outputDict['wrong']
        wrongInstrumentsSymbolLists = self.__formatWrongInstrumentList__(
            wrongInstrumentsSymbolLists, outputDict)

        outputDictFinal = self.__cleanOutputDict__(
            outputDict, wrongInstrumentsSymbolLists)

        if assetType != AssetType.forex and assetType != AssetType.crypto:
            outputDictFinal = self.__cleanBankHolidays__(outputDictFinal)
        df = outputDictFinal[DataDictKeys.close]
        logger.debug(
            'all dictOfMatrix cleaned => finished %d matrixes of %d columns' %
            (len(outputDictFinal), df.shape[1]))

        if persistTempFile is not None:
            if ~persistTempFile.endswith('.xlsx'):
                persistTempFile += '.xlsx'
            self.__createTempExcelFile__(outputDictFinal,
                                         filenameWithExtension=persistTempFile)
        return outputDictFinal
    def __sendEmail__(self,
                      recipient,
                      subject,
                      body,
                      html=None,
                      fileToSendArray=[]):
        import smtplib
        import mimetypes
        from email.mime.multipart import MIMEMultipart
        from email import encoders
        from email.mime.audio import MIMEAudio
        from email.mime.base import MIMEBase
        from email.mime.image import MIMEImage
        from email.mime.text import MIMEText

        fromEmail = self.user_settings.email_address
        try:

            msg = MIMEMultipart()
            msg['From'] = fromEmail
            msg['To'] = recipient
            msg['Subject'] = subject
            body = body
            msg.attach(MIMEText(body, 'plain'))
            if html is not None and isinstance(html, str):
                msg.attach(MIMEText(html, 'html'))

            # %% Atachemnt
            if fileToSendArray is not None and len(fileToSendArray) > 0:
                for fileToSend in fileToSendArray:
                    if fileToSend is not None and os.path.isfile(fileToSend):
                        logger.debug('adding file ' + fileToSend)

                        ctype, encoding = mimetypes.guess_type(fileToSend)
                        if ctype is None or encoding is not None:
                            ctype = "application/octet-stream"

                        maintype, subtype = ctype.split("/", 1)

                        if maintype == "text":
                            fp = open(fileToSend)
                            # Note: we should handle calculating the charset
                            attachment = MIMEText(fp.read(), _subtype=subtype)
                            fp.close()
                        elif maintype == "image":
                            fp = open(fileToSend, "rb")
                            attachment = MIMEImage(fp.read(), _subtype=subtype)
                            fp.close()
                        elif maintype == "audio":
                            fp = open(fileToSend, "rb")
                            attachment = MIMEAudio(fp.read(), _subtype=subtype)
                            fp.close()
                        else:
                            fp = open(fileToSend, "rb")
                            attachment = MIMEBase(maintype, subtype)
                            attachment.set_payload(fp.read())
                            fp.close()
                            encoders.encode_base64(attachment)
                        attachment.add_header("Content-Disposition",
                                              "attachment",
                                              filename=fileToSend)
                        msg.attach(attachment)
            result = False
            counter = 3
            while (not result and counter > 0):
                try:
                    server = smtplib.SMTP(self.user_settings.email_smtp_host,
                                          self.user_settings.email_smtp_port)
                    server.ehlo()
                    server.starttls()
                    server.ehlo()

                    server.login(fromEmail, self.user_settings.email_password)
                    text = msg.as_string()
                    problems = server.sendmail(fromEmail, recipient, text)
                    server.quit()
                    result = True
                except Exception as e:
                    logger.error("Error: unable to send email retry[%d] :%s" %
                                 (counter, str(e)))
                    result = False
                    counter -= 1
                    os.sleep(5)
            if result:
                logger.info("Successfully sent email")
            else:
                logger.error("Error: unable to send email")

        except:
            logger.error("Error: unable to send email")