예제 #1
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = convert.json2value(row.json), None
        timestamp = Date(self.rollover_field(wrap(row).value))
        if timestamp == None or timestamp < Date.today() - self.rollover_max:
            return Null

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": self.cluster.get_aliases(),
                "where": {
                    "regex": {
                        "index":
                        self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"
                    }
                },
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:],
                                      elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = elasticsearch.Index(read_only=False,
                                             alias=best.alias,
                                             index=best.index,
                                             settings=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(
                            create_timestamp=rounded_timestamp,
                            settings=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception, e:
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = elasticsearch.Index(read_only=False,
                                         alias=best.alias,
                                         index=best.index,
                                         settings=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 10, timeout=5)

            self._delete_old_indexes(candidates)

            queue = self.known_queues[
                rounded_timestamp.unix] = es.threaded_queue(
                    max_size=self.settings.queue_size,
                    batch_size=self.settings.batch_size,
                    silent=True)
예제 #2
0
 def _delete_old_indexes(self, candidates):
     for c in candidates:
         timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S")
         if timestamp + self.rollover_interval < Date.today() - self.rollover_max:
             # Log.warning("Will delete {{index}}", index=c.index)
             try:
                 self.cluster.delete_index(c.index)
             except Exception, e:
                 Log.warning("could not delete index {{index}}", index=c.index, cause=e)
예제 #3
0
 def _delete_old_indexes(self, candidates):
     for c in candidates:
         timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S")
         if timestamp + self.rollover_interval < Date.today(
         ) - self.rollover_max:
             # Log.warning("Will delete {{index}}", index=c.index)
             try:
                 self.cluster.delete_index(c.index)
             except Exception, e:
                 Log.warning("could not delete index {{index}}",
                             index=c.index,
                             cause=e)
예제 #4
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = convert.json2value(row.json), None
        timestamp = Date(self.rollover_field(wrap(row).value))
        if timestamp == None or timestamp < Date.today() - self.rollover_max:
            return Null

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": self.cluster.get_aliases(),
                "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}},
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(create_timestamp=rounded_timestamp, settings=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception, e:
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 10, timeout=5)

            self._delete_old_indexes(candidates)

            queue = self.known_queues[rounded_timestamp.unix] = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
예제 #5
0
def loop(source, coverage_summary_index, settings, please_stop):
    try:
        cluster = elasticsearch.Cluster(source)
        aliases = cluster.get_aliases()
        candidates = []
        for pairs in aliases:
            if pairs.alias == source.index:
                candidates.append(pairs.index)
        candidates = jx.sort(candidates, {".": "desc"})

        for index_name in candidates:
            coverage_index = elasticsearch.Index(index=index_name, read_only=False, settings=source)
            push_date_filter = unicode2Date(coverage_index.settings.index[-15::], elasticsearch.INDEX_DATE_FORMAT)

            while not please_stop:
                # IDENTIFY NEW WORK
                Log.note("Working on index {{index}}", index=index_name)
                coverage_index.refresh()

                todo = http.post_json(settings.url, json={
                    "from": "coverage",
                    "groupby": ["source.file.name", "build.revision12"],
                    "where": {"and": [
                        {"missing": "source.method.name"},
                        {"missing": "source.file.min_line_siblings"},
                        {"gte": {"repo.push.date": push_date_filter}}
                    ]},
                    "format": "list",
                    "limit": coalesce(settings.batch_size, 100)
                })

                if not todo.data:
                    break

                queue = Queue("pending source files to review")
                queue.extend(todo.data[0:coalesce(settings.batch_size, 100):])

                threads = [
                    Thread.run(
                        "processor" + unicode(i),
                        process_batch,
                        queue,
                        coverage_index,
                        coverage_summary_index,
                        settings,
                        please_stop=please_stop
                    )
                    for i in range(NUM_THREAD)
                ]

                # ADD STOP MESSAGE
                queue.add(Thread.STOP)

                # WAIT FOR THEM TO COMPLETE
                for t in threads:
                    t.join()

        please_stop.go()
        return

    except Exception, e:
        Log.warning("Problem processing", cause=e)