Esempio n. 1
0
def savePopDB(dbsInfo):
    #read in the popularity jsons
    myF = fopen(os.path.join(baseDir, 'popDaily.txt'))

    myJInput = ''
    for line in myF:
        myJInput += line

    myJ = json.loads(myJInput)

    dates = myJ.keys()

    #make a dictionary of datasets on disk during the various time intervals
    #divide the number of accesses for each dataset by the number of files in the dataset
    #summing up accesses on all days during the interval.
    #TODO: Protect against data in the popularity jsons that is after the end date of the plot
    datasets = {}
    datasetDetails = {}
    startKeys = dStarts.keys()
    for dateStart in startKeys:
        datasets[dateStart] = {}

    for d in dates:
        dVal = getDatePop(d)

        records = myJ[d]['DATA']

        for record in records:
            dataset = record["COLLNAME"]
            if dataset == testDS:
                print 'Dataset read', d, record["NACC"]
            if dataset not in dbsInfo:
                continue
            if 'num_files' in dbsInfo[dataset]:
                nFiles = dbsInfo[dataset]['num_files']
            else:
                nFiles = dbsInfo[dataset]['nfiles']

            if dataset not in datasetDetails:
                datasetDetails[dataset] = {}
            datasetDetails[dataset][dVal] = datasetDetails[dataset].get(
                dVal, 0) + float(record["NACC"]) / float(nFiles)

            for dateStart in startKeys:
                if dVal >= dStarts[dateStart] and dVal <= dEnd:
                    datasets[dateStart][dataset] = datasets[dateStart].get(
                        dataset, 0) + float(record["NACC"]) / float(nFiles)
    #some printout
    for dateStart in startKeys:
        print len(datasets[dateStart].keys())

    print "popularity for", testDS
    for dateStart in startKeys:
        print dateStart + " :", datasets[dateStart].get(testDS, 0)

    fp = fopen(os.path.join(outputDir, 'popDBDetails.data.gz'), 'wb')
    cPickle.dump(datasetDetails, fp)
    fp.close()
Esempio n. 2
0
def readDBSInfo():
    dbsInfo = {}

    print("### use dbsInput %s" % dbsInput)
    with fopen(dbsInput) as istream:
        headers = None
        while True:
            line = istream.readline().replace('\n', '')
            if not line:
                break
            row = line.split(',')
            if not headers:
                headers = row
                continue
            rdict = dict(zip(headers, row))
            dataset = rdict.pop('dataset').replace('"', '')
            for key in rdict.keys():
                rdict[key] = float(rdict[key])
            if 'date' in headers:
                rdict['creation_date'] = datetime.datetime.fromtimestamp(
                    rdict['date'])
            elif 'creation_date' in headers:
                rdict['creation_date'] = datetime.datetime.fromtimestamp(
                    rdict['creation_date'])
            dbsInfo[dataset] = rdict
    return dbsInfo
Esempio n. 3
0
def solve(f=None):
    s = f if f else fopen(16).readline().strip()
    b = bin(int(s, 16))[2:]
    b = b.zfill(len(s) * 4)
    p = Parser(b)
    v = p.parse()
    return v
Esempio n. 4
0
def run():
    f = fopen(18)
    l = f.readline()
    t = Tree(branches=loads(l))
    for l in f.readlines():
        s = Tree(branches=loads(l))
        t = t.combine_trees(s)
    print(t.magnitude())
Esempio n. 5
0
def savePhedexMeans():
    "Save effectirMeansDict to disk"
    effectiveMeansDict = recreate()
    key = intervalStartStrings['12'] + '_' + intervalEndString

    keysES = effectiveMeansDict.keys()
    for keyES in keysES:
        fname = os.path.join(outputDir,
                             'effectiveMeans' + keyES + '_' + key + '.data.gz')
        print("writing %s: %s" % (keyES, fname))
        fp = fopen(fname, 'wb')
        cPickle.dump(effectiveMeansDict[keyES], fp)
        fp.close()
Esempio n. 6
0
def run():
    f = fopen(18)
    ll = f.readlines()
    m = 0
    for i in ll:
        for j in ll:
            if i != j:
                c = Tree(branches=loads(i))
                x = Tree(branches=loads(j))
                c = c.combine_trees(x)
                mm = c.magnitude()
                if mm > m:
                    m = mm
    print(m)
Esempio n. 7
0
def read():
    f = fopen(13)
    c = set()
    l = f.readline()
    while l != '\n':
        c.add(tuple(map(int, l.split(','))))
        l = f.readline()
    i = []
    l = f.readline()
    while l:
        a,v = l.split(' ')[2].split('=')
        i.append(tuple([a, int(v)]))
        l = f.readline()
    return (c,i)
Esempio n. 8
0
def align_scanners():
    scanners = [Scanner(l) for l in fopen(19).read().split('\n\n')]
    l = set([0])
    scanners[0].position = (0, 0, 0)
    while len(l) < len(scanners):
        for i, s in enumerate(scanners):
            for j, c in enumerate(scanners):
                if s == c or i not in l or j in l:
                    continue
                intersection = s.intersect(c)
                if intersection:
                    s.align(c, intersection)
                    l.add(j)
    return scanners
Esempio n. 9
0
def collect_tweets(task, tags):
  '''
  Collect tweets for tag, indefinitely and store in csv files
  '''

  appKeys = kays.appKeys

  with fopen(task, newline='\n', encoding='utf-8') as f:
    keyIdx = 0
    tagIdx = 0

    # writer for csv
    writer = csv.writer(f)

    # save task to log
    writelog(task, tags)

    # collect tweets indefinitely by using all keys
    while True:
      print(time.ctime(), 'Collecting tweets...')
      # get the key
      key = appKeys[keyIdx]

      # create auth and api
      auth = tweepy.OAuthHandler(key['consumerAPIKey'], key['consumerAPISecretKey'])
      auth.set_access_token(key['accessToken'], key['accessTokenSecret'])
      api = tweepy.API(auth)

      # filter out retweets
      query = tags[tagIdx] + ' -filter:retweets'
      count = 0

      # collect tweets and save
      try:
        for tweet in tweepy.Cursor(api.search, q=query).items():
          user = tweet.user

          # escape text
          row = map(esc, [tweet.text, tweet.id, user.name, user.screen_name, user.location, user.description, user.followers_count, user.friends_count, user.listed_count, user.statuses_count, user.favourites_count, user.verified, user.default_profile_image, user.default_profile, user.protected, user.created_at])

          writer.writerow(row)
          count = count+1
      except Exception as e:
        # Wait for 10 mins and then start using next key
        print(time.ctime(), 'Got {} tweets'.format(count))
        # if keyIdx+1 == len(appKeys):
        tagIdx = (tagIdx+1) % len(tags)
        keyIdx = (keyIdx+1) % len(appKeys)
        time.sleep(10 * 60)
Esempio n. 10
0
def run(p, op, cmp):
    l = [*fopen(24)]
    s = []
    for i in range(14):
        a = int(l[18 * i + 5].split()[-1])
        b = int(l[18 * i + 15].split()[-1])

        if a > 0:
            s += [(i, b)]
            continue
        j, b = s.pop()

        p = op(p, abs((a + b) * 10**(13 - [i, j][cmp(a, -b)])))

    print(p)
Esempio n. 11
0
def setup(fold):
    extra = [['D', 'D'], ['C', 'B'], ['B', 'A'], ['A', 'C']]
    f = list(
        zip(*map(lambda x: x.strip().replace('#', ''),
                 fopen(23).readlines()[2:4])))
    h = [None] * 11
    for i in range(len(h)):
        if i in rooms and f:
            e = []
            if fold:
                e = extra[0]
                extra = extra[1:]
            h[i] = [h[i]] + [f[0][0]] + e + [f[0][1]]
            f = f[1:]
    return h
Esempio n. 12
0
def readPhedexMeans():
    "Initialize effectiveMeansDict from data on disk"
    #read in the stored information from disk
    key = intervalStartStrings['12'] + '_' + intervalEndString

    keysES = ["All", "AnaOps", "AllOps", "MinusOne"]

    effectiveMeansDict = {}
    for keyES in keysES:
        fname = os.path.join(outputDir,
                             'effectiveMeans' + keyES + '_' + key + '.data.gz')
        print("reading %s: %s" % (keyES, fname))
        fp = fopen(fname, 'rb')
        effectiveMeansDict[keyES] = cPickle.load(fp)
        fp.close()
    return effectiveMeansDict
Esempio n. 13
0
def solve(m=True):
    s = Counter()
    for l in fopen(22).readlines():
        i, c = l.split(' ')
        c = parse(c, m)
        if not c:
            continue
        i = 1 if i == "on" else -1
        u = Counter()
        for e, ie in s.items():
            t = mxmn(c, e)
            if neg(t):
                u[t] -= ie

        if i > 0:
            u[c] += i
        s.update(u)
    print(sum(prod(map(vol, cube)) * v for cube, v in s.items()))
Esempio n. 14
0
def solve(first=True):
    f = fopen(4)

    draw, _, *numbers = f.readlines()

    cards = [[]]
    card_index = 0
    for r in numbers:
        if r == '\n':
            cards.append([])
            card_index += 1
            continue
        cards[card_index].append(r.strip().split())

    board = Board(cards)
    for d in draw.split(','):
        winner = board.mark_cards(d)
        if winner and (first or board.is_last_winner()):
            print(winner.calc(int(d)))
            break
Esempio n. 15
0
def plotPopularity(arr, keys, popSource, dataLoc, figNum, iformat='png'):

    xVals = numpy.arange(len(arr))
    yByCount = numpy.zeros(len(arr))
    yBySizes = {}
    knownTimes = {}
    knownSamples = {}
    for key in keys:
        yBySizes[key] = numpy.zeros(len(arr))
        sp = key.split()
        knownTimes[sp[0]] = 1
        knownSamples[sp[1]] = 1

    for i, a in enumerate(arr):
        yByCount[i] = (a[0])
        for j, key in enumerate(keys):
            yBySizes[key][i] = a[j + 1]

    width = 0.25
    figNum = figNum + 1
    pylab.figure(figNum)
    pylab.bar(xVals, yByCount, width, color='r')

    pylab.xlabel('Number of accesses', fontsize=15)
    pylab.ylabel('Number of collections', fontsize=15)
    fname = os.path.join(outputDir,
                         "plots/popNum_" + popSource + " " + dataLoc)
    if not os.path.isdir(os.path.join(outputDir, "plots")):
        os.makedirs(os.path.join(outputDir, "plots"))
    pylab.savefig(fname + '.' + iformat, format=iformat)
    pylab.ylim(0, numpy.amax(yByCount) * 1.1)
    ax = pylab.gca()
    ax.set_xticks(xVals + width / 2.)
    ax.set_xticklabels(xVals)

    figNum = figNum + 1
    cols = ['r', 'g', 'b']
    for sample in knownSamples:

        figNum = figNum + 1
        pylab.figure(figNum)
        nBars = 0
        tMaxes = numpy.zeros(len(knownTimes.keys()))
        plottedKeys = []
        for key in keys:
            sp = key.split()
            if sp[1] != sample:
                continue
            nMonths = sp[0]
            plottedKeys.append(key)
            print yBySizes[key]
            print "Sum", key, sample, popSource, dataLoc, numpy.sum(
                yBySizes[key])

            pylab.bar(xVals + nBars * width,
                      yBySizes[key],
                      width,
                      color=cols[nBars],
                      label=nMonths + " months, sum=" +
                      "{0:.1f}".format(numpy.sum(yBySizes[key])))
            tMaxes[nBars] = numpy.amax(yBySizes[key])
            nBars = nBars + 1

        pylab.xlabel('Number of accesses', fontsize=15)
        pylab.ylabel('Weighted total size', fontsize=15)

        pylab.ylim(0, numpy.amax(tMaxes) * 1.1)
        pylab.xlim(0, xVals[-1] + 1)
        ax = pylab.gca()
        ax.set_xticks(xVals + width * 1.5)
        xLabels = ["0 Old"]
        for i in range(len(xVals) - 2):
            xLabels.append(str(i))
        xLabels.append(str(len(xVals) - 2) + "+")

        ax.set_xticklabels(xLabels)
        #        ax.set_xticklabels(xVals)
        pylab.legend(loc='best')
        pylab.title("Samples considered: " + sample + ", popData=" +
                    popSource + " data GID=" + dataLoc)
        fname = os.path.join(
            outputDir,
            "plots/popSize_" + sample + "_" + popSource + "_" + dataLoc)
        if not os.path.isdir(os.path.join(outputDir, "plots")):
            os.makedirs(os.path.join(outputDir, "plots"))
        pylab.savefig(fname + '.' + iformat, format=iformat)

        fname = os.path.join(
            outputDir, "data/popSize_" + sample + "_" + popSource + "_" +
            dataLoc + '.csv.gz')
        if not os.path.isdir(os.path.join(outputDir, "data")):
            os.makedirs(os.path.join(outputDir, "data"))
        fH = fopen(fname, 'w')
        #        fH = fopen(os.path.join(outputDir, "popSize_"+sample+"_"+popSource+"_"+dataLoc+'.csv.gz'),'w')

        fH.write('NAccesses')
        for key in plottedKeys:
            sp = key.split()
            fH.write(',' + sp[0] + ' months (PB)')
        fH.write('\n')
        for k in range(len(xVals)):
            fH.write(str(xLabels[k]))
            for key in plottedKeys:
                fH.write(',' + str(yBySizes[key][k]))
            fH.write('\n')
        fH.close()
    return figNum
Esempio n. 16
0
def saveClassAds(dbsInfo):
    "Generate class ads information and save it on disk"
    datasets = {}
    datasetDetails = {}
    startKeys = dStarts.keys()
    for dateStart in startKeys:
        datasets[dateStart] = {}

    print("### use classAdsInput %s" % classAdsInput)

    for root, dirs, files in os.walk(classAdsInput, topdown=False):
        for idx, name in enumerate(sorted(files)):
            nNull = 0
            nNonNull = 0
            nNullEvts = 0
            fName = os.path.join(root, name)
            headers = []
            skip = False
            with fopen(fName, 'rb') as istream:
                for line in istream:
                    sp = line.split(',')
                    if not headers:
                        headers = sp
                        continue
                    # check if it is dataset-YYYYMMDD.csv file
                    check = ('sum_evts' in headers) or\
                            ('num_events' in headers) or\
                            ('nevents' in headers)
                    if not check:
                        print("Skip %s" % fName)
                        skip = True
                        break
                    if len(sp) < 6:
                        continue
                    if sp[0] == "null":
                        nNull += 1
                        continue
                    if sp[6] == "null":
                        nNullEvts += 1
                        continue
                    if sp[0] == "dataset": continue
                    nNonNull += 1
                    try:
                        ts = long(sp[5])
                    except:
                        continue
                    if ts > 25180904520:  #its in milliseconds!
                        ts = long(ts / 1000)
                    #there are also bogus timestamps - some are easy to recover
                    while ts > 25180904520:
                        ts = long(ts / 1000)
                    try:
                        dVal = datetime.datetime.fromtimestamp(
                            ts).date()  #getDate(sp[8])
                    except ValueError:
                        print 'skipping bad timestamp', ts, line
                        continue
                    dataset = sp[0]
                    if "/DQMIO" in dataset:
                        continue  #there are no events...
                    if dataset not in dbsInfo:
                        continue
                    if 'num_events' in dbsInfo[dataset]:
                        nEvts = dbsInfo[dataset]["num_events"]
                    elif 'nevents' in dbsInfo[dataset]:
                        nEvts = dbsInfo[dataset]["nevents"]
                    elif 'sum_evts' in dbsInfo[dataset]:
                        nEvts = dbsInfo[dataset]["sum_evts"]
                    if float(nEvts) < 1:
                        nEvts = 1.
                    if dataset not in datasetDetails:
                        datasetDetails[dataset] = {}
                    datasetDetails[dataset][
                        dVal] = datasetDetails[dataset].get(
                            dVal, 0) + float(sp[6]) * 1000 / float(nEvts)

                    for dateStart in startKeys:
                        if dVal >= dStarts[dateStart] and dVal <= dEnd:
                            datasets[
                                dateStart][dataset] = datasets[dateStart].get(
                                    dataset,
                                    0) + float(sp[6]) * 1000 / float(nEvts)

            if not skip:
                try:
                    print("%3d %s %s %s" %
                          (idx, name,
                           nNull / float(nNonNull + nNull + nNullEvts + 1e-5),
                           nNullEvts /
                           float(nNonNull + nNull + nNullEvts + 1e-5)))
                except:
                    pass

    #make a dictionary of datasets on disk during the various time intervals
    #divide the number of accesses for each dataset by the number of files in the dataset
    #summing up accesses on all days during the interval.
    #TODO: Protect against data in the popularity jsons that is after the end date of the plot

    fp = fopen(os.path.join(outputDir, 'classads.data.gz'), 'wb')
    cPickle.dump(datasets, fp)
    fp.close()

    fp = fopen(os.path.join(outputDir, 'classadsDetails.data.gz'), 'wb')
    cPickle.dump(datasetDetails, fp)
    fp.close()
Esempio n. 17
0
def readClassAds():
    "Read class ads data and return back datasets dict"
    fp = fopen(os.path.join(outputDir, 'classads.data.gz'), 'rb')
    datasets = cPickle.load(fp)
    fp.close()
    return datasets
Esempio n. 18
0
def readPopDB():
    "Read PopDB data and return back datasets dict"
    fp = fopen(os.path.join(outputDir, 'popDBDetails.data.gz'), 'rb')
    datasets = cPickle.load(fp)
    fp.close()
    return datasets
Esempio n. 19
0
default_pattern = [
    'abcefg', 'cf', 'acdeg', 'acdfg', 'bdcf', 'abdfg', 'abdefg', 'acf',
    'abcdefg', 'abcdfg'
]


def pattern_decoder(p):
    return Counter(''.join(p))


def translate(s, decoder):
    return tuple(sorted([decoder[x] for x in s]))


def calc(i, o):
    decoder = pattern_decoder(i)
    return int(''.join(map(str, [t[translate(x, decoder)] for x in o])))


def get_output(line):
    return calc(*[x.strip().split(' ') for x in line])


t = {}
default_count = pattern_decoder(default_pattern)
for i, x in enumerate(default_pattern):
    k = translate(x, default_count)
    t[k] = i

print(sum([get_output(e.split('|')) for e in fopen(8).readlines()]))
Esempio n. 20
0
def shoot(xv, yv, f):
    xs, xe = f['x']
    ys, ye = f['y']
    cx, cy = 0, 0
    my = 0
    while cx < xe and cy not in range(ys, ye):
        cx += xv
        cy += yv
        if cy > my:
            my = cy
        yv -= 1
        if cx in range(xs, xe) and cy in range(ys, ye):
            return my
        if cx > xe:
            return None
    return 0


def get_max_y(f):
    my = 0
    for x in range(1, 100):
        for y in range(1, 100):
            high = shoot(x, y, f)
            if high and high > my:
                my = high
    return my


f = dict([parse(c) for c in fopen(17).readline()[13:].split(', ')])
print(get_max_y(f))
Esempio n. 21
0
def plots(phedexInfo, dbsInfo, classadsInfo, iformat):
    "Generate popularity plots from phedex/dbs/classads dicts"
    #file to dump dataset by dataset tallies

    fDump = fopen(os.path.join(outputDir, 'dumpIt.txt.gz'), 'w')

    keyInfos = {}
    for key in cbs_keys:
        #the plot has two attributes, the time period and the data sample
        ageKey = findAgeKey(key)
        spKey = spKeys[key]
        dsKey = (spKey[0])
        keyInfos[key] = [ageKey, dsKey]

    #loop over datasets known to phedex
    for dataset in phedexInfo['All']:
        #get attributes from DBS - some data sets are missing - these
        #these tend to be test datasets
        dbsDataset = dbsInfo.get(
            dataset, None)  #it is likely invalid data if its not here

        # here dataset is like /GluGluToHToZZTo4L_M-125_13TeV-powheg-pythia6/Phys14DR-PU20bx25_tsg_PHYS14_25_V1-v1/GEN-SIM-RAW
        # and dbsDataset is {'nfiles': 3199.0, 'nevents': 205484.0, 'size': 309328826257.0}
        if dbsDataset is not None:
            if 'creation_date' not in dbsDataset:
                continue
            ageDataset = dbsDataset['creation_date'].date()
            if 'dataset_size' in dbsDataset:
                sizeDataset = float(dbsDataset['dataset_size'])
            elif 'size' in dbsDataset:
                sizeDataset = float(dbsDataset['size'])
        else:
            ageDataset = None
            sizeDataset = None

        spDataset = dataset.split('/')
        # caching variables to avoid extensive loopups

        cacheES = {}
        for cat in dataCategories:
            cacheES[cat] = phedexInfo[cat].get(dataset, None)

        #loop over the set of plots to make
        for key, valinfo in keyInfos.iteritems():
            #the plot has two attributes, the time period and the data sample
            ageKey = valinfo[0]  #findAgeKey(key)
            dsKey = valinfo[1]  #(spKey[0])
            #stop the loop if the dataset is not part of the plot
            if not interestingDataset(key, spDataset):
                continue

            #loopup the average size of the dataset for this time period
            for cat in dataCategories:  #range(3):
                m = 0
                #            if "All" in cacheES:
                #                if dsKey in cacheES["All"]:
                #                    m=cacheES["All"][dsKey]
                #right
                if cat in cacheES:
                    if dsKey in cacheES[cat]:
                        m = cacheES[cat][dsKey]

                nCopies = 1.
                counter = 0.

                #optionally compute the number of copies (on average) of the dataset on disk
                #by comparing its average size to the size in dbs
                #again only works if dataset is known to dbs
                if divideByNCopies and (sizeDataset is not None):
                    nCopies = m / sizeDataset

                #compute the average number of times each file in the dataset is accessed
                #using popularity data and copies on disk
                #divide but protect against case where the dataset is not on disk
                if nCopies > 0.:
                    counter = (classadsInfo[dsKey].get(dataset, 0)) / nCopies
                else:
                    counter = (classadsInfo[dsKey].get(dataset,
                                                       0))  #this ought to be 0

                if counter > 0 and counter < 1:
                    # any access to the dataset counts - so round up
                    counter = 1
                else:
                    # otherwise round to the nearest integer value
                    counter = round(counter)

                    #distinguish the 0 bin between old and new datasets based on the
                    #age of the dataset
                    if counter == 0:
                        if (ageDataset is not None) and (ageDataset < ageKey):
                            counter = -1  # 0 old

                #cut off the plot at the desired value
                if counter > maxPop:
                    counter = maxPop

                #store the results
                countsBySizeDict[cat][key][
                    counter] = countsBySizeDict[cat][key].get(counter, 0.) + m

                #test and printous
                if dataset == testDS:
                    print "Pop counter for", key, cat, "is", counter

                if cat == "AllOps":
                    fDump.write('%5d %7.5f %15s %s \n' %
                                (counter, m, key, dataset))

    fDump.close()

    #tally up all the information
    sumsDict = computeSums()

    #plot everything
    import plotter

    figNum = 0

    for cat in dataCategories:  #range(3):
        figNum = \
            plotter.plotPopularity(sumsDict[cat], cbs_keys, popularitySource, cat+'_'+popularitySource, figNum, iformat)
Esempio n. 22
0
def readSizes():
    #replica level information
    phedexInfo = {}
    #dataset level information
    phedexDatasetInfo = {}

    #site,dataset,rdate,gid,min_date,max_date,ave_size,max_size,days
    colsPhedex = {
        "site": -1,
        "dataset": -1,
        "rdate": -1,
        "min_date": -1,
        "max_date": -1,
        "ave_size": -1,
        "max_size": -1,
        "days": -1,
        "gid": -1
    }

    colPhedexNames = colsPhedex.keys()

    nCount = 0
    print("### use phedexDataFile %s" % phedexDataFile)
    print("### testDS %s" % testDS)
    istream = fopen(phedexDataFile)
    for l in istream:
        nCount = nCount + 1
        #optionaly test things on a subset of data
        if isTest and nCount > 10000:
            print "Incomplete data as you are just testing"
            break
        sp = l.strip().split(',')
        #use the first row to understand the set of columns
        #stop if the data is not in the expected format
        if nCount == 1:
            for col in colPhedexNames:
                for i in range(0, len(sp)):
                    if col == sp[i]: colsPhedex[col] = i
                if colsPhedex[col] == -1:
                    print "missing column", col
                    print("File: %s" % phedexDataFile)
                    sys.exit(1)


#            print("### colsPhedex", colsPhedex)
        else:
            #create the dictionaries from the phedex csvs
            dataset = sp[colsPhedex["dataset"]]
            site = sp[colsPhedex["site"]]
            rdate = sp[colsPhedex["rdate"]]
            gid = sp[colsPhedex["gid"]]
            #skip anything that is relval
            if 'RelVal' in dataset: continue
            key = (dataset, site, rdate, gid)
            #should become try: blah except: blah
            if dataset not in phedexDatasetInfo:
                phedexDatasetInfo[dataset] = []
            #this can then be used to look up detailed information in phedexInfo dictionary
            phedexDatasetInfo[dataset].append((site, rdate, gid))

            datum = {}
            for col in colPhedexNames:
                if col == "site": continue
                if col == "dataset": continue
                datum[col] = sp[colsPhedex[col]]
            #catch errors - there should never be a repeated key
            if key in phedexInfo:
                print "Duplicated key"
                print key
                print sp
                print phedexInfo[key]
                sys.exit(1)
            #done, just store everything..
            phedexInfo[key] = datum
            if testDS in key:
                print("### testDS", key, datum)

    istream.close()

    replicas = phedexInfo.keys()
    nRep = len(replicas)

    #now make dataset level arrays that contain day-by-day size on T1/T2 disk
    #do that for analysis ops and comp ops and gid=-1 (which is a nonsense value)
    esDictKeys = ["All", "AnaOps", "AllOps", "MinusOne"]
    effectiveSizesDict = {}
    effectiveSizesFunc = {}
    for key in esDictKeys:
        effectiveSizesDict[key] = {}
        method = "is" + key
        effectiveSizesFunc[key] = globals()[method]

    print("phedexDatasetInfo", len(phedexDatasetInfo.keys()), "size",
          object_size(phedexDatasetInfo))

    #loop over dataset and replicas
    for dataset, keyInfos in phedexDatasetInfo.iteritems():
        #again, skip relvals here - even if there should be none
        if "/RelVal" in dataset:
            continue
        #create the arrays
        cacheES = {k: numpy.zeros(nDays) for k in esDictKeys}
        for key, val in cacheES.iteritems():
            effectiveSizesDict[key][dataset] = val

        #get the list replicas for this dataset
        #keyInfos=phedexDatasetInfo[dataset]
        #loop over them
        for keyInfo in keyInfos:
            site = keyInfo[0]
            #skip things that are not T1 or T2
            if not site.startswith("T1") and not site.startswith("T2"):
                continue
            if not use_only_tier2 and not site.startswith("T2"):
                continue

            #get the detailed phedex information for this replica
            phKey = (dataset, ) + keyInfo
            phDatum = phedexInfo[phKey]
            d1 = getDate(phDatum["min_date"])
            d2 = getDate(phDatum["max_date"])

            #compute the range of days that this replica was on disk
            indEnd = (d2 - dStartOldest).days if d2 < dEnd else nDays - 1
            if indEnd < 0:
                continue  #sample was gone before the period we are looking at
            indStart = (d1 - dStartOldest).days if d1 > dStartOldest else 0

            #just some printouts for debugging if you want them
            if dataset == testDS:
                print site, phKey, phDatum
                print d1, d2
                print "start and end", indStart, indEnd
                print float(phDatum['ave_size'])

            #set the daily size to the average seen in the phedex dumps
            for key, val in effectiveSizesFunc.iteritems():
                if val(keyInfo):
                    cacheES[key][indStart:indEnd + 1] += float(
                        phDatum['ave_size'])
    return effectiveSizesDict
Esempio n. 23
0
def solve(n):
    img = Image(*fopen(20).read().split('\n\n'))
    img.tr(n)
    img.count()