コード例 #1
0
ファイル: yearn.py プロジェクト: ryan-leung/ml_monorepo
def _getDeltas(filepath, source):
    localDir=config.load_source_config(source)["local_dir"]
    lastFilepath=database.getLastProcessedFile(source)
    if lastFilepath is not None:
        lastFilepath="/".join((os.environ["DATA_DIR"],localDir,lastFilepath))
        
    (lastData,lastArchive,lastStartDate,lastBornMillis)=_parseFile(lastFilepath)
    currentData,currentArchive,currentStartDate,currentBornMillis=_parseFile(filepath)
    assert (lastArchive is None and currentArchive is True) or currentArchive is False
    assert (currentStartDate>=lastStartDate)
    
    #get the data that need to be killed. these are data that were in the previous file, but not in
    #the current. The death time can be the timestamp in any item in currentData, since, except for
    #the very first archive file, all data should have the same timestamp 
    remove={}
    for entry,annDate in lastData.iteritems():
        if annDate<currentStartDate: #entry[1] is the annMillis
            continue
        if entry not in currentData:
            remove[entry]=currentBornMillis
            
    #get the data that need be inserted. similar to above
    insert={}
    for entry,annDate in currentData.iteritems():
        if entry not in lastData:
            insert[entry]=currentBornMillis

    return insert,remove
コード例 #2
0
def _getDeltas(filepath, source):
    localDir = config.load_source_config(source)["local_dir"]
    lastFilepath = database.getLastProcessedFile(source)
    if lastFilepath is not None:
        lastFilepath = "/".join((os.environ["DATA_DIR"], localDir, lastFilepath))
        
    lastData, lastStartDate, lastBornMillis = _parseFile(lastFilepath)
    currentData, currentStartDate, currentBornMillis = _parseFile(filepath)
    assert (currentStartDate >= lastStartDate)
    
    remove = ()
    for lastDatum in lastData:
        if lastDatum[2] < currentStartDate: #entry[1] is the annMillis
            continue
        if lastDatum not in currentData:
            remove.add(lastDatum)
            remove[lastDatum] = currentBornMillis
            
    #get the data that need be inserted. similar to above
    insert = set()
    for currentDatum in currentData:
        if currentDatum not in lastData:
            insert.add(currentDatum)

    return insert, remove, currentBornMillis
コード例 #3
0
def __getDeltas(filepath, source, treatFutureData=False):
    ######get the previously processed file##########

    localDir = config.load_source_config(source)["local_dir"]
    previousData = set()
    previousFileName = database.getLastProcessedFile(source)
    if previousFileName is not None:
        previousFileName = os.environ[
            "DATA_DIR"] + "/" + localDir + "/" + previousFileName
        previousFileInfo = datafiles.read_info_file(previousFileName)
        previousFileDate = previousFileInfo["date_first_present"]

        firstLoad = False
        zf = zipfile.ZipFile(previousFileName)
        names = zf.namelist()
        assert len(names) == 1
        file = zf.open(names[0])

        #skip header
        file.readline()
        for line in file:
            if treatFutureData:
                effectiveDate = line.strip().split("|")[4]
                effectiveDate = dateutil.parser.parse(effectiveDate +
                                                      " 00:00:00.000000 UTC")
                if effectiveDate < previousFileDate:
                    previousData.add(line)
            else:
                previousData.add(line)

        file.close()
        zf.close()
    else:
        firstLoad = True

    ##########get deltas from previous file#############
    currentData = set()
    zf = zipfile.ZipFile(filepath)
    names = zf.namelist()
    assert len(names) == 1
    file = zf.open(names[0])

    #skip header
    file.readline()
    for line in file:
        currentData.add(line)

    file.close()
    zf.close()

    newData = currentData - previousData
    removedData = previousData - currentData

    return (newData, removedData, firstLoad)
コード例 #4
0
ファイル: backoffice.py プロジェクト: ryan-leung/ml_monorepo
    def getPositions(self, date):

        PositionSource.getPositions(self, date)

        #Read from the morgan data source
        source = "morgan_positions"
        sourceConfig = config.load_source_config(source)
        dataPath = "{dataPath}/{localDir}/".format(
            dataPath=os.environ["DATA_DIR"],
            localDir=sourceConfig["local_dir"])
        fileRegex = "Tower_Positions\\.{}\\.0000\\.txt".format(
            date.strftime("%d%m%y"))
        fileRegex += r'\.[a-f0-9]{8}$'
        fileRegex = re.compile(fileRegex)

        #get subdirs that could contain the desired file (after the date)
        def candidateSubDir(dir):
            if not os.path.isdir(dataPath + dir):
                return False

            try:
                dirDate = datetime.datetime.strptime(dir, "%Y%m%d")
            except ValueError:
                return False

            return dirDate > date

        candidateSubDirs = filter(candidateSubDir, os.listdir(dataPath))
        candidateSubDirs.sort()  #Sort for efficiency

        #Find file with positions
        positionFile = None
        for subDir in candidateSubDirs:

            if positionFile != None:
                break

            path = dataPath + subDir
            for file in os.listdir(path):
                if fileRegex.match(file):
                    positionFile = dataPath + subDir + "/" + file
                    break

        if positionFile == None:
            raise PositionSourceError(
                "MorganStanleyPosition found no matching files")

        return self._readPositionsFromFile(positionFile)
コード例 #5
0
    # Check for previously running instance
    if not database.getProcessedFilesLock():
        util.warning("Not processing, previous instance running")
        sys.exit(1)

        #XXX may want to precache seen files for speed in loading
    try:
        for source in options.source.split("+"):
            util.info("Processing source %s" % source)
            from data_sources.file_source import FileSource

            util.info("Indexing new files for %s" % source)
            fs = FileSource()
            files = []
            sconfig = config.load_source_config(source)

            # List files
            fs.cwd("%s/%s" % (os.environ['DATA_DIR'], sconfig['local_dir']))
            if (options.files is not None):
                files_regex = options.files
            elif sconfig["exec_regex"] is not None:
                files_regex = sconfig["exec_regex"]
            else:
                files_regex = sconfig["local_regex"]
            listing = fs.list_recursive(files_regex + "\.info", sizes=False)

            # Load set of seen files
            util.info("Fetching processed files for %s" % source)
            seen = database.getProcessedFiles(source)
コード例 #6
0
ファイル: acquire.py プロジェクト: ryan-leung/ml_monorepo
    parser.add_option("-m", "--maxfiles", default=100, dest="maxfiles")
    parser.add_option("-a", "--maxage", default=5, dest="maxage")
    parser.add_option("-d",
                      "--debug",
                      default=False,
                      action="store_true",
                      dest="debug")
    (options, args) = parser.parse_args()

    if options.debug:
        util.set_debug()
    else:
        util.set_log_file(options.source, True)

    lock_f = util.lock(options.source)  #Create a lock
    config = config.load_source_config(
        options.source)  #Load config file for source
    time_file = "%s/%s/%s.time" % (os.environ["DATA_DIR"], config["local_dir"],
                                   options.source)

    util.info("Acquiring data from %s" % options.source)

    try:
        # Read last check time
        try:
            last_time = cPickle.load(open(time_file, 'rb'))
        except IOError:
            last_time = ""

        t = random.random() * 15
        time.sleep(t)
        util.info("Checking (after waiting %ds)" % t)
コード例 #7
0
ファイル: data_mon.py プロジェクト: ryan-leung/ml_monorepo
def checkAcquireTimestampsAndNewFiles(
        dataCheckFrequency=1L * 24L * 60L * 60L * 1000L,
        defaultNewDataFrequency=1L * 24L * 60L * 60L * 1000L):
    sourceConfigDir = os.environ["CONFIG_DIR"] + "/sources"
    #sourceConfigDir="/apps/logs/ase/config/sources"

    errors = []
    warnings = []
    normal = []

    for sourceConfigFile in os.listdir(sourceConfigDir):
        if sourceConfigFile[-3:] != ".py":
            continue

        sourceName = sourceConfigFile[:-3]
        sc = config.load_source_config(sourceName)

        #Check major errors first
        try:
            #sourceLocalDir=os.environ["DATA_DIR"]+"/"+sc["local_dir"]
            sourceLocalDir = os.environ["DATA_DIR"] + "/" + sc["local_dir"]
        except KeyError:  #no local dir, old format
            #raise
            continue

        if not os.path.exists(sourceLocalDir):
            errors.append("{}: Never checked".format(sourceName))
            continue

        #get time file
        timeFile = sourceLocalDir + "/" + sourceName + ".time"
コード例 #8
0
ファイル: barra.py プロジェクト: ryan-leung/ml_monorepo
    file.close()

    #######MAPPING VERIFICATION CODE########
    if verifyOnly:
        return inconcistentMappings
    ########################################


if __name__ == "__main__":
    #ammend barra data and add the INDNAME values

    newdb.init_db()
    database = newdb.get_db()
    #collect all the files processed so far
    processedFiles = database.getProcessedFilesTimeOrdered("barra")
    localDir = config.load_source_config("barra")["local_dir"]

    try:
        i = 0
        for file in processedFiles:
            database.start_transaction()
            print datetime.datetime.now(), file
            process("/".join((os.environ["DATA_DIR"], localDir, file)),
                    "barra")
            database.commit()
    except Exception, e:
        print e
        database.rollback()
    else:
        pass
        database.commit()
コード例 #9
0
        toDate = fromDate + dayDelta
    elif args.fromDate is not None and args.toDate is not None:
        fromDate = datetime.datetime.strptime(args.fromDate, "%Y%m%d")
        toDate = datetime.datetime.strptime(args.toDate, "%Y%m%d")
    elif args.all is True:
        fromDate = datetime.datetime.strptime("20090910", "%Y%m%d")
        toDate = datetime.datetime.utcnow()
        toDate = datetime.datetime.strptime(toDate.strftime("%Y%m%d"),
                                            "%Y%m%d")  #Get only date
        toDate += dayDelta
    else:
        parser.print_help(util.LOGFILE)
        exit(1)

    #load a relevant config file to get password
    sconfig = config.load_source_config("htb")
    source = SFTPSource(sconfig["host"], sconfig["user"], sconfig["pass"])

    date = fromDate
    while date < toDate:
        try:
            if args.fills:
                remoteDir = "/spare/local/ase/trade/run/live-prod/" + date.strftime(
                    "%Y/%m/%d")
                localDir = args.location + "/" + date.strftime("%Y%m%d")
                source.cwd(remoteDir)
                files = source.list(r".*fills.txt")
                for file in files:
                    source.copy(remoteDir + "/" + file[0],
                                localDir + "/" + "old." + file[0])
            if args.calcres: