Esempio n. 1
0
def main():

    if True:
        os.chdir(constants.BENCH_BASE_DIR)
        for i in range(30):
            try:
                runCommand('hg pull -u > hgupdate.log')
            except RuntimeError:
                message('  retry...')
                time.sleep(60.0)
            else:
                s = open('hgupdate.log', 'r').read()
                if s.find('not updating') != -1:
                    raise RuntimeError('hg did not update: %s' % s)
                else:
                    break
        else:
            raise RuntimeError('failed to run hg pull -u')

        os.chdir('%s/%s' % (constants.BASE_DIR, NIGHTLY_DIR))

        runCommand('svn cleanup')
        open('update.log',
             'ab').write('\n\n[%s]: update' % datetime.datetime.now())
        for i in range(30):
            try:
                runCommand('svn update > update.log 2>&1')
            except RuntimeError:
                message('  retry...')
                time.sleep(60.0)
            else:
                svnRev = int(
                    reSVNRev.search(open('update.log', 'rb').read()).group(1))
                print 'SVN rev is %s' % svnRev
                break
        else:
            raise RuntimeError('svn update failed')

    runCommand('%s clean > clean.log 2>&1' % constants.ANT_EXE)
    runCommand('%s compile > compile.log 2>&1' % constants.ANT_EXE)

    MEDIUM_LINE_FILE = constants.NIGHTLY_MEDIUM_LINE_FILE
    MEDIUM_INDEX_NUM_DOCS = constants.NIGHTLY_MEDIUM_INDEX_NUM_DOCS

    mediumSource = competition.Data('wikimedium', MEDIUM_LINE_FILE,
                                    MEDIUM_INDEX_NUM_DOCS,
                                    constants.WIKI_MEDIUM_TASKS_FILE)

    comp = competition.Competition()
    index = comp.newIndex(NIGHTLY_DIR, mediumSource)
    c = comp.competitor(id, NIGHTLY_DIR, index=index)
    r = benchUtil.RunAlgs(constants.JAVA_COMMAND, True)
    r.compile(c)
Esempio n. 2
0
    comp = competition.Competition(randomSeed=0)

    index = comp.newIndex(
        constants.TRUNK_CHECKOUT,
        sourceData,
        postingsFormat='Lucene90',
        idFieldPostingsFormat='Memory',
        grouping=False,
        doDeletions=False,
        addDVFields=True,
    )

    c = competition.Competitor('base', constants.TRUNK_CHECKOUT)

    r = benchUtil.RunAlgs(constants.JAVA_COMMAND, False, False)
    r.compile(c)
    r.makeIndex(c.name, index, False)

    cp = '%s' % r.classPathToString(benchUtil.getClassPath(c.checkout))
    fip = '%s/index' % benchUtil.nameToIndexPath(index.getName())
    modes = benchUtil.getArg('-mode', 'update', True)
    docsPerSec = benchUtil.getArg('-dps', '1', True)
    reopenPerSec = benchUtil.getArg('-rps', '0.2', True)
    runTimeSec = benchUtil.getArg('-rts', 60, True)
    numSearchThreads = benchUtil.getArg('-nst', 1,
                                        True)  # default to 1 search thread
    numIndexThreads = benchUtil.getArg('-nit', constants.INDEX_NUM_THREADS,
                                       True)

    for mode in modes.split(','):
Esempio n. 3
0
def run(id,
        base,
        challenger,
        coldRun=False,
        doCharts=False,
        search=False,
        index=False,
        verifyScores=True,
        verifyCounts=True,
        taskPatterns=None,
        randomSeed=None,
        requireOverlap=1.0):
    competitors = [challenger, base]

    if randomSeed is None:
        raise RuntimeError('missing randomSeed')

    #verifyScores = False
    r = benchUtil.RunAlgs(constants.JAVA_COMMAND, verifyScores, verifyCounts)
    if '-noc' not in sys.argv:
        print()
        print('Compile:')
        for c in competitors:
            r.compile(c)
    if not search:
        search = '-search' in sys.argv

    if not index:
        index = '-index' in sys.argv
    sum = search or '-sum' in sys.argv

    if index:

        seen = set()
        indexSegCount = None
        indexCommit = None
        p = False
        tasksFile = None
        for c in competitors:
            if tasksFile is None:
                tasksFile = c.tasksFile
            elif tasksFile != c.tasksFile:
                raise RuntimeError('inconsistent taskFile %s vs %s' %
                                   (taskFile, c.taskFile))
            if c.index not in seen:
                if not p:
                    print()
                    print('Create indices:')
                    p = True
                seen.add(c.index)
                r.makeIndex(id, c.index, doCharts)
                segCount = benchUtil.getSegmentCount(
                    benchUtil.nameToIndexPath(c.index.getName()))
                if indexSegCount is None:
                    indexSegCount = segCount
                    indexCommit = c.commitPoint
                elif indexCommit == c.commitPoint and indexSegCount != segCount:
                    raise RuntimeError(
                        'segment counts differ across indices: %s vs %s' %
                        (indexSegCount, segCount))

    logUpto = 0

    if search:

        if taskPatterns != (None, None):
            pos, neg = taskPatterns
            if pos is None:
                if neg is None:
                    print('    tasks file: %s' % tasksFile)
                else:
                    print('    tasks file: NOT %s from %s' %
                          (','.join(neg), tasksFile))
            elif neg is None:
                print('    tasks file: %s from %s' %
                      (','.join(pos), tasksFile))
            else:
                print('    tasks file: %s, NOT %s from %s' %
                      (','.join(pos), ','.join(neg), tasksFile))
            newTasksFile = '%s/%s.tasks' % (constants.BENCH_BASE_DIR,
                                            os.getpid())
            pos, neg = taskPatterns
            if pos is None:
                posPatterns = None
            else:
                posPatterns = [re.compile(x) for x in pos]
            if neg is None:
                negPatterns = None
            else:
                negPatterns = [re.compile(x) for x in neg]

            f = open(c.tasksFile)
            fOut = open(newTasksFile, 'wb')
            for l in f.readlines():
                i = l.find(':')
                if i != -1:
                    cat = l[:i]
                    if posPatterns is not None:
                        for p in posPatterns:
                            if p.search(cat) is not None:
                                #print 'KEEP: match on %s' % cat
                                break
                        else:
                            continue
                    if negPatterns is not None:
                        skip = False
                        for p in negPatterns:
                            if p.search(cat) is not None:
                                skip = True
                                #print 'SKIP: match on %s' % cat
                                break
                        if skip:
                            continue

                if PYTHON_MAJOR_VER < 3:
                    fOut.write(l)
                else:
                    fOut.write(l.encode('utf-8'))
            f.close()
            fOut.close()

            for c in competitors:
                c.tasksFile = newTasksFile

        else:
            print('    tasks file: %s' % c.tasksFile)
            newTasksFile = None

        try:

            results = {}

            if constants.JAVA_COMMAND.find(' -ea') != -1:
                print()
                print(
                    'WARNING: *** assertions are enabled *** JAVA_COMMAND=%s' %
                    constants.JAVA_COMMAND)
                print()

            print()
            print('Search:')

            taskFiles = {}

            rand = random.Random(randomSeed)
            staticSeed = rand.randint(-10000000, 1000000)

            # Remove old log files:
            for c in competitors:
                for fileName in r.getSearchLogFiles(id, c):
                    if os.path.exists(fileName):
                        os.remove(fileName)

            for iter in range(base.competition.jvmCount):

                print('  iter %d' % iter)

                seed = rand.randint(-10000000, 1000000)

                for c in competitors:
                    print('    %s:' % c.name)
                    t0 = time.time()
                    if c not in results:
                        results[c] = []
                    logFile = r.runSimpleSearchBench(iter,
                                                     id,
                                                     c,
                                                     coldRun,
                                                     seed,
                                                     staticSeed,
                                                     filter=None,
                                                     taskPatterns=taskPatterns)
                    results[c].append(logFile)

                print()
                print('Report after iter %d:' % iter)
                #print '  results: %s' % results
                details, cmpDiffs, cmpHeap = r.simpleReport(
                    results[base],
                    results[challenger],
                    '-jira' in sys.argv,
                    '-html' in sys.argv,
                    cmpDesc=challenger.name,
                    baseDesc=base.name)
                if cmpDiffs is not None:
                    if cmpDiffs[1]:
                        raise RuntimeError('errors occurred: %s' %
                                           str(cmpDiffs))
                    if cmpDiffs[2] < requireOverlap:
                        raise RuntimeError('results differ: %s' %
                                           str(cmpDiffs))

        finally:
            if newTasksFile is not None and os.path.exists(newTasksFile):
                os.remove(newTasksFile)

        # TODO: maybe print this after each iter, not just in the end, for the impatient/progressive?
        for mode in 'cpu', 'heap':
            for c in competitors:
                print(f'\n{mode.upper()} merged search profile for {c.name}:')
                print(c.getAggregateProfilerResult(id, mode)[0][1])

    else:
        results = {}
        for c in competitors:
            results[c] = r.getSearchLogFiles(id, c)

        details, cmpDiffs, cmpHeap = r.simpleReport(results[base],
                                                    results[challenger],
                                                    '-jira' in sys.argv,
                                                    '-html' in sys.argv,
                                                    cmpDesc=challenger.name,
                                                    baseDesc=base.name)
        if cmpDiffs is not None:
            raise RuntimeError('results differ: %s' % str(cmpDiffs))
Esempio n. 4
0
# MAX_BUFFERED_DOCS = 49774
# INDEXING_BUFFER_MB = -1

MAX_BUFFERED_DOCS = 5000
INDEXING_BUFFER_MB = -1

BODY_FIELD_TERM_VECTORS = True

# DOC_COUNT = 27625038
# DOC_COUNT = 100000
DOC_COUNT = 10000000

print('Compile luceneutil and %s/%s...' %
      (constants.BASE_DIR, LUCENE_TRUNK_ROOT))
r = benchUtil.RunAlgs(JAVA_CMD, False)
c = competition.Competitor('foo', LUCENE_TRUNK_ROOT)
c.compile(r.classPathToString(r.getClassPath(c.checkout)))

while True:
    print
    print('%s: create index' % datetime.datetime.now())

    shutil.rmtree(INDEX_PATH)

    cmd = '%s -classpath "ROOT/lucene/build/core/classes/java:ROOT/lucene/build/core/classes/test:ROOT/lucene/build/sandbox/classes/java:ROOT/lucene/build/misc/classes/java:ROOT/lucene/build/facet/classes/java:/home/mike/src/lucene-c-boost/dist/luceneCBoost-SNAPSHOT.jar:ROOT/lucene/build/analysis/common/classes/java:ROOT/lucene/build/analysis/icu/classes/java:ROOT/lucene/build/queryparser/classes/java:ROOT/lucene/build/grouping/classes/java:ROOT/lucene/build/suggest/classes/java:ROOT/lucene/build/highlighter/classes/java:ROOT/lucene/build/codecs/classes/java:ROOT/lucene/build/queries/classes/java:lib/HdrHistogram.jar:build" perf.Indexer -dirImpl MMapDirectory -indexPath "%s" -analyzer StandardAnalyzerNoStopWords -lineDocsFile %s -docCountLimit %s -threadCount %d -maxConcurrentMerges 3 -dvfields -ramBufferMB %s -maxBufferedDocs %d -postingsFormat Lucene50 -waitForMerges -mergePolicy LogDocMergePolicy -facets Date -facetDVFormat Lucene50 -idFieldPostingsFormat Memory'.replace(
        'ROOT', '%s/%s' % (constants.BASE_DIR, LUCENE_TRUNK_ROOT)) % (
            JAVA_CMD, INDEX_PATH, LINE_DOCS_FILE, DOC_COUNT, INDEX_THREADS,
            INDEXING_BUFFER_MB, MAX_BUFFERED_DOCS)

    if USE_CMS: