Esempio n. 1
0
  def __init__(cls, name, bases, env_dict):  # pylint: disable=no-self-argument
    registry.MetaclassRegistry.__init__(cls, name, bases, env_dict)

    has_mixin = False
    for mixin in itervalues(cls.connection_mixins):
      if issubclass(cls, mixin):
        has_mixin = True
        break

    # ApiRegressionTest is a base class, so it doesn't make sense to generate
    # _http_v1/_http_v2 classes for it.
    # Generating classes from already generated classes would lead to infinite
    # recursion. Skipping the generated ones.
    if name == "ApiRegressionTest" or has_mixin:
      return

    for mixin in itervalues(ApiRegressionTestMetaclass.connection_mixins):
      if (mixin.skip_legacy_dynamic_proto_tests and
          getattr(cls, "uses_legacy_dynamic_protos", False)):
        continue

      # Do not generate combinations where the mixin demands relational db reads
      # but the test is aff4 only.
      if (getattr(cls, "aff4_only_test", False) and
          getattr(mixin, "read_from_relational_db", False)):
        continue

      cls_name = "%s_%s" % (name, mixin.connection_type)
      test_cls = compatibility.MakeType(
          cls_name,
          (mixin, cls, test_lib.GRRBaseTest),
          # pylint: disable=protected-access
          {"testForRegression": lambda x: x._testForRegression()})
      module = sys.modules[cls.__module__]
      setattr(module, cls_name, test_cls)
Esempio n. 2
0
    def prepareSourcestampListForTrigger(self):
        if self.sourceStamps:
            ss_for_trigger = {}
            for ss in self.sourceStamps:
                codebase = ss.get('codebase', '')
                assert codebase not in ss_for_trigger, "codebase specified multiple times"
                ss_for_trigger[codebase] = ss
            return list(itervalues(ss_for_trigger))

        if self.alwaysUseLatest:
            return []

        # start with the sourcestamps from current build
        ss_for_trigger = {}
        objs_from_build = self.build.getAllSourceStamps()
        for ss in objs_from_build:
            ss_for_trigger[ss.codebase] = ss.asDict()

        # overrule revision in sourcestamps with got revision
        if self.updateSourceStamp:
            got = self.getAllGotRevisions()
            for codebase in ss_for_trigger:
                if codebase in got:
                    ss_for_trigger[codebase]['revision'] = got[codebase]

        return list(itervalues(ss_for_trigger))
Esempio n. 3
0
  def ParseFileset(self, fileset=None):
    """Process linux system login files.

    Orchestrates collection of  account entries from /etc/passwd and
    /etc/shadow. The passwd and shadow entries are reconciled and group
    memberships are mapped to the account.

    Args:
      fileset: A dict of files mapped from path to an open file.

    Yields:
      - A series of User entries, each of which is populated with
         group memberships and indications of the shadow state of the account.
      - A series of anomalies in cases where there are mismatches between passwd
        and shadow state.
    """
    self.AddPassword(fileset)
    self.AddShadow(fileset)
    self.ReconcileShadow(self.shadow_store)
    # Get group memberships using the files that were already collected.
    # Separate out groups and anomalies.
    for rdf in LinuxSystemGroupParser().ParseFileset(fileset):
      if isinstance(rdf, rdf_client.Group):
        self.groups[rdf.name] = rdf
      else:
        yield rdf
    self.AddGroupMemberships()
    for user in itervalues(self.entry):
      yield user
    for grp in itervalues(self.groups):
      yield grp
    for anom in self.FindAnomalies():
      yield anom
Esempio n. 4
0
File: build.py Progetto: google/grr
def CreateNewZipWithSignedLibs(z_in,
                               z_out,
                               ignore_files=None,
                               signer=None,
                               skip_signing_files=None):
  """Copies files from one zip to another, signing all qualifying files."""
  ignore_files = ignore_files or []
  skip_signing_files = skip_signing_files or []
  extensions_to_sign = [".sys", ".exe", ".dll", ".pyd"]
  to_sign = []
  for template_file in z_in.namelist():
    if template_file not in ignore_files:
      extension = os.path.splitext(template_file)[1].lower()
      if (signer and template_file not in skip_signing_files and
          extension in extensions_to_sign):
        to_sign.append(template_file)
      else:
        CopyFileInZip(z_in, template_file, z_out)

  temp_files = {}
  for filename in to_sign:
    fd, path = tempfile.mkstemp()
    with os.fdopen(fd, "wb") as temp_fd:
      temp_fd.write(z_in.read(filename))
    temp_files[filename] = path

  try:
    signer.SignFiles(itervalues(temp_files))
  except AttributeError:
    for f in itervalues(temp_files):
      signer.SignFile(f)

  for filename, tempfile_path in iteritems(temp_files):
    z_out.writestr(filename, open(tempfile_path, "rb").read())
Esempio n. 5
0
  def RunXinetdCheck(self,
                     chk_id,
                     svc,
                     disabled,
                     sym,
                     found,
                     xinetd=False,
                     should_detect=True):
    host_data = self.SetKnowledgeBase()
    cfgs = parsers_test_lib.GenXinetd(svc, disabled)
    stats, files = parsers_test_lib.GenTestData(cfgs, itervalues(cfgs))
    data = list(self.parser(stats, files, None))

    # create entries on whether xinetd itself is setup to start or not
    if xinetd:
      cfgs = parsers_test_lib.GenInit("xinetd",
                                      "the extended Internet services daemon")
      stats, files = parsers_test_lib.GenTestData(cfgs, itervalues(cfgs))
      lsb_parser = linux_service_parser.LinuxLSBInitParser()
      data.extend(list(lsb_parser.ParseMultiple(stats, files, None)))

    host_data["LinuxServices"] = self.SetArtifactData(parsed=data)
    results = self.RunChecks(host_data)

    if should_detect:
      self.assertCheckDetectedAnom(chk_id, results, sym, found)
    else:
      self.assertCheckUndetected(chk_id, results)
Esempio n. 6
0
    def _process_changes(self, newRev, branch):
        """
        Read changes since last change.

        - Read list of commit hashes.
        - Extract details from each commit.
        - Add changes to database.
        """

        # initial run, don't parse all history
        if not self.lastRev:
            return
        if newRev in itervalues(self.lastRev):
            # TODO: no new changes on this branch
            # should we just use the lastRev again, but with a different branch?
            pass

        # get the change list
        revListArgs = ([r'--format=%H', r'%s' % newRev] +
                       [r'^%s' % rev for rev in itervalues(self.lastRev)] +
                       [r'--'])
        self.changeCount = 0
        results = yield self._dovccmd('log', revListArgs, path=self.workdir)

        # process oldest change first
        revList = results.split()
        revList.reverse()
        self.changeCount = len(revList)
        self.lastRev[branch] = newRev

        if self.changeCount:
            log.msg('gitpoller: processing %d changes: %s from "%s" branch "%s"'
                    % (self.changeCount, revList, self.repourl, branch))

        for rev in revList:
            dl = defer.DeferredList([
                self._get_commit_timestamp(rev),
                self._get_commit_author(rev),
                self._get_commit_files(rev),
                self._get_commit_comments(rev),
            ], consumeErrors=True)

            results = yield dl

            # check for failures
            failures = [r[1] for r in results if not r[0]]
            if failures:
                # just fail on the first error; they're probably all related!
                raise failures[0]

            timestamp, author, files, comments = [r[1] for r in results]

            yield self.master.data.updates.addChange(
                author=author, revision=ascii2unicode(rev), files=files,
                comments=comments, when_timestamp=timestamp,
                branch=ascii2unicode(self._removeHeads(branch)),
                project=self.project, repository=ascii2unicode(self.repourl),
                category=self.category, src=u'git')
def build_training_data(dataset, language, data_augmentation_config,
                        random_state):
    # Create class mapping
    intents = dataset[INTENTS]
    intent_index = 0
    classes_mapping = dict()
    for intent in sorted(intents):
        classes_mapping[intent] = intent_index
        intent_index += 1

    noise_class = intent_index

    # Computing dataset statistics
    nb_utterances = [len(intent[UTTERANCES]) for intent in itervalues(intents)]

    augmented_utterances = []
    utterance_classes = []
    for nb_utterance, intent_name in zip(nb_utterances, intents):
        min_utterances_to_generate = max(
            data_augmentation_config.min_utterances, nb_utterance)
        utterances = augment_utterances(
            dataset, intent_name, language=language,
            min_utterances=min_utterances_to_generate,
            capitalization_ratio=0.0, random_state=random_state)
        augmented_utterances += utterances
        utterance_classes += [classes_mapping[intent_name] for _ in
                              range(len(utterances))]
    augmented_utterances = add_unknown_word_to_utterances(
        augmented_utterances,
        data_augmentation_config.unknown_words_replacement_string,
        data_augmentation_config.unknown_word_prob,
        random_state
    )

    # Adding noise
    noisy_utterances = generate_noise_utterances(
        augmented_utterances, len(intents), data_augmentation_config, language,
        random_state)
    augmented_utterances = [get_text_from_chunks(u[DATA])
                            for u in augmented_utterances]

    augmented_utterances += noisy_utterances
    utterance_classes += [noise_class for _ in noisy_utterances]
    if noisy_utterances:
        classes_mapping[NOISE_NAME] = noise_class

    nb_classes = len(set(itervalues(classes_mapping)))
    intent_mapping = [None for _ in range(nb_classes)]
    for intent, intent_class in iteritems(classes_mapping):
        if intent == NOISE_NAME:
            intent_mapping[intent_class] = None
        else:
            intent_mapping[intent_class] = intent

    return augmented_utterances, np.array(utterance_classes), intent_mapping
Esempio n. 8
0
    def addBuildset(self, waited_for, scheduler=None, sourcestamps=None, reason=u'',
                    properties=None, builderids=None, external_idstring=None,
                    parent_buildid=None, parent_relationship=None,
                    _reactor=reactor):
        if sourcestamps is None:
            sourcestamps = []
        if properties is None:
            properties = {}
        if builderids is None:
            builderids = []
        submitted_at = int(_reactor.seconds())
        bsid, brids = yield self.master.db.buildsets.addBuildset(
            sourcestamps=sourcestamps, reason=reason,
            properties=properties, builderids=builderids,
            waited_for=waited_for, external_idstring=external_idstring,
            submitted_at=epoch2datetime(submitted_at),
            parent_buildid=parent_buildid, parent_relationship=parent_relationship)

        yield BuildRequestCollapser(self.master, list(itervalues(brids))).collapse()

        # get each of the sourcestamps for this buildset (sequentially)
        bsdict = yield self.master.db.buildsets.getBuildset(bsid)
        sourcestamps = []
        for ssid in bsdict['sourcestamps']:
            sourcestamps.append(
                (yield self.master.data.get(('sourcestamps', str(ssid)))).copy()
            )

        # notify about the component build requests
        brResource = self.master.data.getResourceType("buildrequest")
        brResource.generateEvent(list(itervalues(brids)), 'new')

        # and the buildset itself
        msg = dict(
            bsid=bsid,
            external_idstring=external_idstring,
            reason=reason,
            submitted_at=submitted_at,
            complete=False,
            complete_at=None,
            results=None,
            scheduler=scheduler,
            sourcestamps=sourcestamps)
        # TODO: properties=properties)
        self.produceEvent(msg, "new")

        log.msg("added buildset %d to database" % bsid)

        # if there are no builders, then this is done already, so send the
        # appropriate messages for that
        if not builderids:
            yield self.maybeBuildsetComplete(bsid, _reactor=_reactor)

        defer.returnValue((bsid, brids))
Esempio n. 9
0
 def AddGroupMemberships(self):
   """Adds aggregate group membership from group, gshadow and passwd."""
   self.groups = {g.name: self._Members(g) for g in itervalues(self.groups)}
   # Map the groups a user is a member of, irrespective of primary/extra gid.
   for g in itervalues(self.groups):
     for user in g.members:
       membership = self.memberships.setdefault(user, set())
       membership.add(g.gid)
   # Now add the completed membership to the user account.
   for user in itervalues(self.entry):
     user.gids = self.memberships.get(user.username)
Esempio n. 10
0
  def ReadMessageHandlerRequests(self):
    """Reads all message handler requests from the database."""
    res = []
    leases = self.message_handler_leases
    for requests in itervalues(self.message_handler_requests):
      for r in itervalues(requests):
        res.append(r.Copy())
        existing_lease = leases.get(r.handler_name, {}).get(r.request_id, None)
        res[-1].leased_until = existing_lease

    return sorted(res, key=lambda r: -1 * r.timestamp)
Esempio n. 11
0
        def percentIndexOverlap(attr, results):
            """
            Returns what fraction of index values are common to all NDFrames in results,
            compared to the NDFrame with the most rows/columns.

            Pass "index" or "columns" as ``attr`` to specify rows or columns.
            """
            getter = operator.attrgetter(attr)

            longest = max(len(getter(df)) for df in itervalues(results))
            columnsIter = (getter(df) for df in itervalues(results))
            mutualColumns = functools.reduce(lambda colA, colB: colA.intersection(colB), columnsIter)
            return len(mutualColumns)/longest
Esempio n. 12
0
  def ReadClientMessages(self, client_id):
    """Reads all client messages available for a given client_id."""
    res = []
    for msgs_by_id in itervalues(self.client_messages):
      for orig_msg in itervalues(msgs_by_id):
        if db_utils.ClientIdFromGrrMessage(orig_msg) != client_id:
          continue
        msg = orig_msg.Copy()
        current_lease = self.client_message_leases.get(msg.task_id)
        if current_lease:
          msg.leased_until, msg.leased_by = current_lease
        res.append(msg)

    return res
Esempio n. 13
0
    def canStartBuild(self):
        """
        I am called when a build is requested to see if this worker
        can start a build.  This function can be used to limit overall
        concurrency on the worker.

        Note for subclassers: if a worker can become willing to start a build
        without any action on that worker (for example, by a resource in use on
        another worker becoming available), then you must arrange for
        L{maybeStartBuildsForWorker} to be called at that time, or builds on
        this worker will not start.
        """

        if self.worker_status.isPaused():
            return False

        # If we're waiting to shutdown gracefully, then we shouldn't
        # accept any new jobs.
        if self.worker_status.getGraceful():
            return False

        if self.max_builds:
            active_builders = [sb for sb in itervalues(self.workerforbuilders)
                               if sb.isBusy()]
            if len(active_builders) >= self.max_builds:
                return False

        if not self.locksAvailable():
            return False

        return True
Esempio n. 14
0
    def mergeSourceStampsWith(self, others):
        """ Returns one merged sourcestamp for every codebase """
        # get all codebases from all requests
        all_codebases = set(self.sources)
        for other in others:
            all_codebases |= set(other.sources)

        all_merged_sources = {}
        # walk along the codebases
        for codebase in all_codebases:
            all_sources = []
            if codebase in self.sources:
                all_sources.append(self.sources[codebase])
            for other in others:
                if codebase in other.sources:
                    all_sources.append(other.sources[codebase])
            assert len(
                all_sources) > 0, "each codebase should have atleast one sourcestamp"

            # TODO: select the sourcestamp that best represents the merge,
            # preferably the latest one.  This used to be accomplished by
            # looking at changeids and picking the highest-numbered.
            all_merged_sources[codebase] = all_sources[-1]

        return list(itervalues(all_merged_sources))
 def checkSummary(self, got, step, build=None):
     self.failUnless(all(isinstance(k, unicode) for k in got))
     self.failUnless(all(isinstance(k, unicode) for k in itervalues(got)))
     exp = {u'step': step}
     if build:
         exp[u'build'] = build
     self.assertEqual(got, exp)
Esempio n. 16
0
        def old_way():
            d = None
            for b in itervalues(self.buildslave.slavebuilders):
                if b.remote:
                    d = b.mind.callRemote("shutdown")
                    break

            if d:
                name = self.buildslave.slavename
                log.msg("Shutting down (old) slave: %s" % name)
                # The remote shutdown call will not complete successfully since
                # the buildbot process exits almost immediately after getting
                # the shutdown request.
                # Here we look at the reason why the remote call failed, and if
                # it's because the connection was lost, that means the slave
                # shutdown as expected.

                @d.addErrback
                def _errback(why):
                    if why.check(pb.PBConnectionLost):
                        log.msg("Lost connection to %s" % name)
                    else:
                        log.err("Unexpected error when trying to shutdown %s"
                                % name)
                return d
            log.err("Couldn't find remote builder to shut down slave")
            return defer.succeed(None)
Esempio n. 17
0
 def stopConsuming(self, key=None):
     if key is not None:
         self.qrefs[key].stopConsuming()
     else:
         for qref in itervalues(self.qrefs):
             qref.stopConsuming()
         self.qrefs = {}
Esempio n. 18
0
 def checkSummary(self, got, step, build=None):
     self.assertTrue(all(isinstance(k, text_type) for k in got))
     self.assertTrue(all(isinstance(k, text_type) for k in itervalues(got)))
     exp = {u'step': step}
     if build:
         exp[u'build'] = build
     self.assertEqual(got, exp)
Esempio n. 19
0
 def findChangeSourceId(self, name):
     validation.verifyType(self.testcase, 'changesource name', name,
                           validation.StringValidator())
     if name not in self.changesourceIds:
         self.changesourceIds[name] = max(
             [0] + list(itervalues(self.changesourceIds))) + 1
     return defer.succeed(self.changesourceIds[name])
Esempio n. 20
0
 def activate(self):
     yield NightlyBase.activate(self)
     lastTrigger = yield self.getState("lastTrigger", None)
     self._lastTrigger = None
     if lastTrigger:
         try:
             if isinstance(lastTrigger[0], list):
                 self._lastTrigger = (
                     lastTrigger[0],
                     properties.Properties.fromDict(lastTrigger[1]),
                     lastTrigger[2],
                     lastTrigger[3],
                 )
             # handle state from before Buildbot-0.9.0
             elif isinstance(lastTrigger[0], dict):
                 self._lastTrigger = (
                     list(itervalues(lastTrigger[0])),
                     properties.Properties.fromDict(lastTrigger[1]),
                     None,
                     None,
                 )
         except Exception:
             pass
         # If the lastTrigger isn't of the right format, ignore it
         if not self._lastTrigger:
             log.msg(
                 format="NightlyTriggerable Scheduler <%(scheduler)s>: "
                 "could not load previous state; starting fresh",
                 scheduler=self.name,
             )
Esempio n. 21
0
    def check_single_master(self):
        # check additional problems that are only valid in a single-master
        # installation
        if self.multiMaster:
            return

        if not self.workers:
            error("no workers are configured")

        if not self.builders:
            error("no builders are configured")

        # check that all builders are implemented on this master
        unscheduled_buildernames = set([b.name for b in self.builders])
        for s in itervalues(self.schedulers):
            builderNames = s.listBuilderNames()
            if interfaces.IRenderable.providedBy(builderNames):
                unscheduled_buildernames.clear()
            else:
                for n in builderNames:
                    if interfaces.IRenderable.providedBy(n):
                        unscheduled_buildernames.clear()
                    elif n in unscheduled_buildernames:
                        unscheduled_buildernames.remove(n)
        if unscheduled_buildernames:
            error("builder(s) %s have no schedulers to drive them"
                  % (', '.join(unscheduled_buildernames),))
Esempio n. 22
0
 def forget_about_it(self, name):
     validation.verifyType(self.testcase, 'scheduler name', name,
                           validation.StringValidator())
     if name not in self.schedulerIds:
         self.schedulerIds[name] = max(
             [0] + list(itervalues(self.schedulerIds))) + 1
     return defer.succeed(self.schedulerIds[name])
Esempio n. 23
0
 def test_api_collection_filter_and_order_desc(self):
     yield self.render_resource(self.rsrc, '/test?field=info&order=-info')
     self.assertRestCollection(typeName='tests',
                               items=sorted(list([{'info': v['info']}
                                                  for v in itervalues(endpoint.testData)]),
                                            key=lambda v: v['info'], reverse=True),
                               total=8, orderSignificant=True)
Esempio n. 24
0
 def test_api_collection_filter_pagination(self):
     yield self.render_resource(self.rsrc, '/test?success=false&limit=2')
     # note that the limit/offset and total are *after* the filter
     self.assertRestCollection(typeName='tests',
                               items=sorted([v for v in itervalues(endpoint.testData)
                                             if not v['success']], key=lambda v: v['id'])[:2],
                               total=3)
Esempio n. 25
0
  def Lookup(self,
             keywords,
             start_time=FIRST_TIMESTAMP,
             end_time=LAST_TIMESTAMP,
             last_seen_map=None):
    """Finds objects associated with keywords.

    Find the names related to all keywords.

    Args:
      keywords: A collection of keywords that we are interested in.
      start_time: Only considers keywords added at or after this point in time.
      end_time: Only considers keywords at or before this point in time.
      last_seen_map: If present, is treated as a dict and populated to map pairs
        (keyword, name) to the timestamp of the latest connection found.
    Returns:
      A set of potentially relevant names.

    """
    posting_lists = self.ReadPostingLists(
        keywords,
        start_time=start_time,
        end_time=end_time,
        last_seen_map=last_seen_map)

    results = list(itervalues(posting_lists))
    relevant_set = results[0]

    for hits in results:
      relevant_set &= hits

      if not relevant_set:
        return relevant_set

    return relevant_set
Esempio n. 26
0
 def ReadChildFlowObjects(self, client_id, flow_id):
   """Reads flows that were started by a given flow from the database."""
   res = []
   for flow in itervalues(self.flows):
     if flow.client_id == client_id and flow.parent_flow_id == flow_id:
       res.append(flow)
   return res
Esempio n. 27
0
def collection_view_listing_db(context, request):
    result = {}

    frame = request.params.get('frame', 'columns')

    limit = request.params.get('limit', 25)
    if limit in ('', 'all'):
        limit = None
    if limit is not None:
        try:
            limit = int(limit)
        except ValueError:
            limit = 25

    items = (
        item for item in itervalues(context)
        if request.has_permission('view', item)
    )

    if limit is not None:
        items = islice(items, limit)

    result['@graph'] = [
        request.embed(request.resource_path(item, '@@' + frame))
        for item in items
    ]

    if limit is not None and len(result['@graph']) == limit:
        params = [(k, v) for k, v in request.params.items() if k != 'limit']
        params.append(('limit', 'all'))
        result['all'] = '%s?%s' % (request.resource_path(context), urlencode(params))

    return result
Esempio n. 28
0
  def Handle(self, unused_args, token=None):
    router_methods = self.router.__class__.GetAnnotatedMethods()

    result = ApiListApiMethodsResult()
    for router_method in itervalues(router_methods):
      api_method = ApiMethod(
          name=router_method.name,
          category=router_method.category,
          doc=router_method.doc,
          http_route=router_method.http_methods[-1][1],
          http_methods=[router_method.http_methods[-1][0]])

      if router_method.args_type:
        api_method.args_type_descriptor = (
            api_value_renderers.BuildTypeDescriptor(router_method.args_type))

      if router_method.result_type:
        if router_method.result_type == router_method.BINARY_STREAM_RESULT_TYPE:
          api_method.result_kind = api_method.ResultKind.BINARY_STREAM
        else:
          api_method.result_kind = api_method.ResultKind.VALUE
          api_method.result_type_descriptor = (
              api_value_renderers.BuildTypeDescriptor(router_method.result_type)
          )
      else:
        api_method.result_kind = api_method.ResultKind.NONE

      result.items.append(api_method)

    return result
Esempio n. 29
0
        def check(result):
            # finish up the debounced updateSummary before checking
            self.debounceClock.advance(1)
            self.assertEqual(self.expected_remote_commands, [],
                             "assert all expected commands were run")

            # in case of unexpected result, display logs in stdout for debugging failing tests
            if result != self.exp_result:
                for loog in itervalues(self.step.logs):
                    print(loog.stdout)
                    print(loog.stderr)

            self.assertEqual(result, self.exp_result, "expected result")
            if self.exp_state_string:
                stepStateString = self.master.data.updates.stepStateString
                stepids = list(stepStateString)
                assert stepids, "no step state strings were set"
                self.assertEqual(stepStateString[stepids[0]],
                                 self.exp_state_string,
                                 "expected step state strings")
            for pn, (pv, ps) in iteritems(self.exp_properties):
                self.assertTrue(self.properties.hasProperty(pn),
                                "missing property '%s'" % pn)
                self.assertEqual(self.properties.getProperty(pn),
                                 pv, "property '%s'" % pn)
                if ps is not None:
                    self.assertEqual(
                        self.properties.getPropertySource(pn), ps, "property '%s' source" % pn)
            for pn in self.exp_missing_properties:
                self.assertFalse(self.properties.hasProperty(pn),
                                 "unexpected property '%s'" % pn)
            for l, contents in iteritems(self.exp_logfiles):
                self.assertEqual(
                    self.step.logs[l].stdout, contents, "log '%s' contents" % l)
Esempio n. 30
0
  def ParseMultiple(self, stats, knowledge_base):
    """Parse each returned registry value."""
    user_dict = {}

    for stat in stats:
      sid_str = stat.pathspec.path.split("/", 3)[2]
      if SID_RE.match(sid_str):
        if sid_str not in user_dict:
          user_dict[sid_str] = rdf_client.User(sid=sid_str)

        if stat.registry_data.GetValue():
          # Look up in the mapping if we can use this entry to populate a user
          # attribute, and if so, set it.
          reg_key_name = stat.pathspec.Dirname().Basename()
          if reg_key_name in self.key_var_mapping:
            map_dict = self.key_var_mapping[reg_key_name]
            reg_key = stat.pathspec.Basename()
            kb_attr = map_dict.get(reg_key)
            if kb_attr:
              value = artifact_utils.ExpandWindowsEnvironmentVariables(
                  stat.registry_data.GetValue(), knowledge_base)
              value = artifact_utils.ExpandWindowsUserEnvironmentVariables(
                  value, knowledge_base, sid=sid_str)
              user_dict[sid_str].Set(kb_attr, value)

    # Now yield each user we found.
    return itervalues(user_dict)
Esempio n. 31
0
  def _LegacyCheckHashesWithFileStore(self):
    """Check all queued up hashes for existence in file store (legacy).

    Hashes which do not exist in the file store will be downloaded. This
    function flushes the entire queue (self.state.pending_hashes) in order to
    minimize the round trips to the file store.

    If a file was found in the file store it is copied from there into the
    client's VFS namespace. Otherwise, we request the client to hash every block
    in the file, and add it to the file tracking queue
    (self.state.pending_files).
    """
    if not self.state.pending_hashes:
      return

    # This map represents all the hashes in the pending urns.
    file_hashes = {}

    # Store a mapping of hash to tracker. Keys are hashdigest objects,
    # values are arrays of tracker dicts.
    hash_to_tracker = {}
    for index, tracker in iteritems(self.state.pending_hashes):

      # We might not have gotten this hash yet
      if tracker.get("hash_obj") is None:
        continue

      hash_obj = tracker["hash_obj"]
      digest = hash_obj.sha256
      file_hashes[index] = hash_obj
      hash_to_tracker.setdefault(digest, []).append(tracker)

    # First we get all the files which are present in the file store.
    files_in_filestore = {}

    # TODO(amoser): This object never changes, could this be a class attribute?
    filestore_obj = aff4.FACTORY.Open(
        legacy_filestore.FileStore.PATH,
        legacy_filestore.FileStore,
        mode="r",
        token=self.token)

    for file_store_urn, hash_obj in filestore_obj.CheckHashes(
        itervalues(file_hashes), external=self.state.use_external_stores):

      # Since checkhashes only returns one digest per unique hash we need to
      # find any other files pending download with the same hash.
      for tracker in hash_to_tracker[hash_obj.sha256]:
        self.state.files_skipped += 1
        file_hashes.pop(tracker["index"])
        files_in_filestore[file_store_urn] = hash_obj
        # Remove this tracker from the pending_hashes store since we no longer
        # need to process it.
        self.state.pending_hashes.pop(tracker["index"])

    # Now that the check is done, reset our counter
    self.state.files_hashed_since_check = 0
    # Now copy all existing files to the client aff4 space.
    for filestore_file_urn, hash_obj in iteritems(files_in_filestore):

      for file_tracker in hash_to_tracker.get(hash_obj.sha256, []):
        stat_entry = file_tracker["stat_entry"]
        # Copy the existing file from the filestore to the client namespace.
        target_urn = stat_entry.pathspec.AFF4Path(self.client_urn)

        aff4.FACTORY.Copy(
            filestore_file_urn, target_urn, update_timestamps=True)

        with aff4.FACTORY.Open(
            target_urn, mode="rw", token=self.token) as new_fd:
          new_fd.Set(new_fd.Schema.STAT, stat_entry)
          # Due to potential filestore corruption, the existing files
          # can have 0 size.
          if new_fd.size == 0:
            new_fd.size = (file_tracker["bytes_read"] or stat_entry.st_size)

        if data_store.RelationalDBWriteEnabled():
          path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry)
          path_info.hash_entry = hash_obj
          data_store.REL_DB.WritePathInfos(self.client_id, [path_info])

        # Add this file to the filestore index.
        filestore_obj.AddURNToIndex(str(hash_obj.sha256), target_urn)

        # Report this hit to the flow's caller.
        self._ReceiveFetchedFile(file_tracker)

    # Now we iterate over all the files which are not in the store and arrange
    # for them to be copied.
    for index in file_hashes:

      # Move the tracker from the pending hashes store to the pending files
      # store - it will now be downloaded.
      file_tracker = self.state.pending_hashes.pop(index)
      self.state.pending_files[index] = file_tracker

      # If we already know how big the file is we use that, otherwise fall back
      # to the size reported by stat.
      if file_tracker["bytes_read"] > 0:
        file_tracker["size_to_download"] = file_tracker["bytes_read"]
      else:
        file_tracker["size_to_download"] = file_tracker["stat_entry"].st_size

      # We do not have the file here yet - we need to retrieve it.
      expected_number_of_hashes = (
          file_tracker["size_to_download"] // self.CHUNK_SIZE + 1)

      # We just hash ALL the chunks in the file now. NOTE: This maximizes client
      # VFS cache hit rate and is far more efficient than launching multiple
      # GetFile flows.
      self.state.files_to_fetch += 1

      for i in range(expected_number_of_hashes):
        if i == expected_number_of_hashes - 1:
          # The last chunk is short.
          length = file_tracker["size_to_download"] % self.CHUNK_SIZE
        else:
          length = self.CHUNK_SIZE
        self.CallClient(
            server_stubs.HashBuffer,
            pathspec=file_tracker["stat_entry"].pathspec,
            offset=i * self.CHUNK_SIZE,
            length=length,
            next_state="CheckHash",
            request_data=dict(index=index))

    if self.state.files_hashed % 100 == 0:
      self.Log("Hashed %d files, skipped %s already stored.",
               self.state.files_hashed, self.state.files_skipped)
Esempio n. 32
0
 def test_api_collection_list_filter(self):
     yield self.render_resource(self.rsrc, b'/test?tags__contains=a')
     self.assertRestCollection(typeName='tests',
                               items=[v for v in itervalues(endpoint.testData)
                                      if 'a' in v['tags']],
                               total=2)
Esempio n. 33
0
 def test_api_collection_operator_filter(self):
     yield self.render_resource(self.rsrc, b'/test?info__lt=skipped')
     self.assertRestCollection(typeName='tests',
                               items=[v for v in itervalues(endpoint.testData)
                                      if v['info'] < 'skipped'],
                               total=4)
Esempio n. 34
0
 def test_api_collection_simple_filter(self):
     yield self.render_resource(self.rsrc, b'/test?success=yes')
     self.assertRestCollection(
         typeName='tests',
         items=[v for v in itervalues(endpoint.testData) if v['success']],
         total=5)
Esempio n. 35
0
    def run(self):
        schedulers_and_props = yield self.getSchedulersAndProperties()

        schedulers_and_props_list = []

        # To be back compatible we need to differ between old and new style
        # schedulers_and_props can either consist of 2 elements tuple or
        # dictionary
        for element in schedulers_and_props:
            if isinstance(element, dict):
                schedulers_and_props_list = schedulers_and_props
                break
            else:
                # Old-style back compatibility: Convert tuple to dict and make
                # it important
                d = {
                    'sched_name': element[0],
                    'props_to_set': element[1],
                    'unimportant': False
                }
                schedulers_and_props_list.append(d)

        # post process the schedulernames, and raw properties
        # we do this out of the loop, as this can result in errors
        schedulers_and_props = [
            (self.getSchedulerByName(entry_dict['sched_name']),
             self.createTriggerProperties(entry_dict['props_to_set']),
             entry_dict['unimportant'])
            for entry_dict in schedulers_and_props_list
        ]

        ss_for_trigger = self.prepareSourcestampListForTrigger()

        dl = []
        triggeredNames = []
        results = SUCCESS
        self.running = True

        unimportant_brids = []

        for sch, props_to_set, unimportant in schedulers_and_props:
            idsDeferred, resultsDeferred = sch.trigger(
                waited_for=self.waitForFinish,
                sourcestamps=ss_for_trigger,
                set_props=props_to_set,
                parent_buildid=self.build.buildid,
                parent_relationship=self.parent_relationship)
            # we are not in a hurry of starting all in parallel and managing
            # the deferred lists, just let the db writes be serial.
            brids = {}
            try:
                bsid, brids = yield idsDeferred
            except Exception as e:
                yield self.addLogWithException(e)
                results = EXCEPTION
            if unimportant:
                unimportant_brids.extend(itervalues(brids))
            self.brids.extend(itervalues(brids))
            for brid in brids.values():
                # put the url to the brids, so that we can have the status from
                # the beginning
                url = self.master.status.getURLForBuildrequest(brid)
                yield self.addURL("%s #%d" % (sch.name, brid), url)
            dl.append(resultsDeferred)
            triggeredNames.append(sch.name)
            if self.ended:
                defer.returnValue(CANCELLED)
        self.triggeredNames = triggeredNames

        if self.waitForFinish:
            self.waitForFinishDeferred = defer.DeferredList(dl,
                                                            consumeErrors=1)
            try:
                rclist = yield self.waitForFinishDeferred
            except defer.CancelledError:
                pass
            # we were interrupted, don't bother update status
            if self.ended:
                defer.returnValue(CANCELLED)
            yield self.addBuildUrls(rclist)
            results = yield self.worstStatus(results, rclist,
                                             unimportant_brids)
        else:
            # do something to handle errors
            for d in dl:
                d.addErrback(
                    log.err,
                    '(ignored) while invoking Triggerable schedulers:')

        defer.returnValue(results)
Esempio n. 36
0
  def _CheckHashesWithFileStore(self):
    """Check all queued up hashes for existence in file store.

    Hashes which do not exist in the file store will be downloaded. This
    function flushes the entire queue (self.state.pending_hashes) in order to
    minimize the round trips to the file store.

    If a file was found in the file store it is not scheduled for collection
    and its PathInfo is written to the datastore pointing to the file store's
    hash. Otherwise, we request the client to hash every block in the file,
    and add it to the file tracking queue (self.state.pending_files).
    """
    if not data_store.RelationalDBReadEnabled(category="filestore"):
      return self._LegacyCheckHashesWithFileStore()

    if not self.state.pending_hashes:
      return

    # This map represents all the hashes in the pending urns.
    file_hashes = {}

    # Store a mapping of hash to tracker. Keys are hashdigest objects,
    # values are arrays of tracker dicts.
    hash_to_tracker = {}
    for index, tracker in iteritems(self.state.pending_hashes):

      # We might not have gotten this hash yet
      if tracker.get("hash_obj") is None:
        continue

      hash_obj = tracker["hash_obj"]
      digest = hash_obj.sha256
      file_hashes[index] = hash_obj
      hash_to_tracker.setdefault(rdf_objects.SHA256HashID(digest),
                                 []).append(tracker)

    # First we get all the files which are present in the file store.
    files_in_filestore = set()

    statuses = file_store.CheckHashes([
        rdf_objects.SHA256HashID.FromBytes(ho.sha256.AsBytes())
        for ho in itervalues(file_hashes)
    ])
    for hash_id, status in iteritems(statuses):
      self.HeartBeat()

      if not status:
        continue

      # Since checkhashes only returns one digest per unique hash we need to
      # find any other files pending download with the same hash.
      for tracker in hash_to_tracker[hash_id]:
        self.state.files_skipped += 1
        file_hashes.pop(tracker["index"])
        files_in_filestore.add(hash_id)
        # Remove this tracker from the pending_hashes store since we no longer
        # need to process it.
        self.state.pending_hashes.pop(tracker["index"])

    # Now that the check is done, reset our counter
    self.state.files_hashed_since_check = 0
    # Now copy all existing files to the client aff4 space.
    for hash_id in files_in_filestore:

      for file_tracker in hash_to_tracker.get(hash_id, []):
        stat_entry = file_tracker["stat_entry"]
        path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry)
        path_info.hash_entry = hash_obj
        data_store.REL_DB.WritePathInfos(self.client_id, [path_info])

        # Report this hit to the flow's caller.
        self._ReceiveFetchedFile(file_tracker)

    # Now we iterate over all the files which are not in the store and arrange
    # for them to be copied.
    for index in file_hashes:

      # Move the tracker from the pending hashes store to the pending files
      # store - it will now be downloaded.
      file_tracker = self.state.pending_hashes.pop(index)
      self.state.pending_files[index] = file_tracker

      # If we already know how big the file is we use that, otherwise fall back
      # to the size reported by stat.
      if file_tracker["bytes_read"] > 0:
        file_tracker["size_to_download"] = file_tracker["bytes_read"]
      else:
        file_tracker["size_to_download"] = file_tracker["stat_entry"].st_size

      # We do not have the file here yet - we need to retrieve it.
      expected_number_of_hashes = (
          file_tracker["size_to_download"] // self.CHUNK_SIZE + 1)

      # We just hash ALL the chunks in the file now. NOTE: This maximizes client
      # VFS cache hit rate and is far more efficient than launching multiple
      # GetFile flows.
      self.state.files_to_fetch += 1

      for i in range(expected_number_of_hashes):
        if i == expected_number_of_hashes - 1:
          # The last chunk is short.
          length = file_tracker["size_to_download"] % self.CHUNK_SIZE
        else:
          length = self.CHUNK_SIZE

        self.CallClient(
            server_stubs.HashBuffer,
            pathspec=file_tracker["stat_entry"].pathspec,
            offset=i * self.CHUNK_SIZE,
            length=length,
            next_state="CheckHash",
            request_data=dict(index=index))

    if self.state.files_hashed % 100 == 0:
      self.Log("Hashed %d files, skipped %s already stored.",
               self.state.files_hashed, self.state.files_skipped)
Esempio n. 37
0
 def test_api_collection_fields(self):
     yield self.render_resource(self.rsrc, b'/test?field=success&field=info')
     self.assertRestCollection(typeName='tests',
                               items=[{'success': v['success'], 'info': v['info']}
                                      for v in itervalues(endpoint.testData)],
                               total=8)
Esempio n. 38
0
 def added(self):
     return itervalues(self._added)
Esempio n. 39
0
 def ReadFlowProcessingRequests(self):
     """Reads all flow processing requests from the database."""
     return list(itervalues(self.flow_processing_requests))
Esempio n. 40
0
def _get_videoids(videos):
    """Return a list of VideoId s for the videos"""
    return [
        common.VideoId.from_videolist_item(video)
        for video in itervalues(videos)
    ]
Esempio n. 41
0
def _get_titles(videos):
    """Return a list of videos' titles"""
    return [
        _get_title(video) for video in itervalues(videos) if _get_title(video)
    ]
Esempio n. 42
0
    def _process_changes(self, newRev, branch):
        """
        Read changes since last change.

        - Read list of commit hashes.
        - Extract details from each commit.
        - Add changes to database.
        """

        # initial run, don't parse all history
        if not self.lastRev:
            return
        if newRev in itervalues(self.lastRev):
            # TODO: no new changes on this branch
            # should we just use the lastRev again, but with a different branch?
            pass

        # get the change list
        revListArgs = ([r'--format=%H', r'%s' % newRev] + [
            r'^%s' % rev.encode('ascii', 'ignore')
            for rev in itervalues(self.lastRev)
        ] + [r'--'])
        self.changeCount = 0
        results = yield self._dovccmd('log', revListArgs, path=self.workdir)

        # process oldest change first
        revList = results.split()
        revList.reverse()
        self.changeCount = len(revList)
        self.lastRev[branch] = newRev

        if self.changeCount:
            log.msg(
                'gitpoller: processing %d changes: %s from "%s" branch "%s"' %
                (self.changeCount, revList, self.repourl, branch))

        for rev in revList:
            dl = defer.DeferredList([
                self._get_commit_timestamp(rev),
                self._get_commit_author(rev),
                self._get_commit_files(rev),
                self._get_commit_comments(rev),
            ],
                                    consumeErrors=True)

            results = yield dl

            # check for failures
            failures = [r[1] for r in results if not r[0]]
            if failures:
                # just fail on the first error; they're probably all related!
                raise failures[0]

            timestamp, author, files, comments = [r[1] for r in results]

            yield self.master.data.updates.addChange(
                author=author,
                revision=ascii2unicode(rev),
                files=files,
                comments=comments,
                when_timestamp=timestamp,
                branch=ascii2unicode(self._removeHeads(branch)),
                project=self.project,
                repository=ascii2unicode(self.repourl),
                category=self.category,
                src=u'git')
Esempio n. 43
0
    def infer_spikes(self,
                     channel=0,
                     label=None,
                     gamma=None,
                     share_gamma=True,
                     mode='correct',
                     verbose=False):
        """Infer the most likely discretized spike train underlying a
        fluorescence trace.

        Parameters
        ----------
        channel : string or int, optional
            The channel to be used for spike inference.
        label : string or None, optional
            Text string indicating the signals from which spikes should be
            inferred. Defaults to the most recently extracted signals.
        gamma : float, optional
            Gamma is 1 - timestep/tau, where tau is the time constant of the
            AR(1) process.  If no value is given, then gamma is estimated from
            the data.
        share_gamma : bool, optional
            Whether to apply the same gamma estimate to all ROIs. Defaults to
            True.
        mode : {'correct', 'robust', 'psd'}, optional
            The method for estimating sigma. The 'robust' method overestimates
            the noise by assuming that gamma = 1. The 'psd' method estimates
            sigma from the PSD of the fluorescence data. Default: 'correct'.
        verbose : bool, optional
            Whether to print status updates. Default: False.

        Returns
        -------
        spikes : ndarray of float
            The inferred normalized spike count at each time-bin.  Values are
            normalized to the maximum value over all time-bins.
            Shape: (num_rois, num_timebins).
        fits : ndarray of float
            The inferred denoised fluorescence signal at each time-bin.
            Shape: (num_rois, num_timebins).
        parameters : dict of (str, ndarray of float)
            Dictionary with values for 'sigma', 'gamma', and 'baseline'.

        Notes
        -----
        We strongly recommend installing MOSEK (www.mosek.com; free for
        academic use) which greatly speeds up the inference.

        References
        ----------
        * Pnevmatikakis et al. 2015. Submitted (arXiv:1409.2903).
        * Machado et al. 2015. Submitted.
        * Vogelstein et al. 2010. Journal of Neurophysiology. 104(6):
          3691-3704.

        """

        channel = self._resolve_channel(channel)

        import sima.spikes
        all_signals = self.signals(channel)
        if label is None:
            label = most_recent_key(all_signals)
        signals = all_signals[label]

        # estimate gamma for all cells
        if mode == "psd":
            if share_gamma:
                mega_trace = np.concatenate(
                    [sigs for sigs in signals['raw'][0]])
                sigma = sima.spikes.estimate_sigma(mega_trace)
                gamma = sima.spikes.estimate_gamma(mega_trace, sigma)
                sigma = [sigma for _ in signals['raw'][0]]
                gamma = [gamma for _ in signals['raw'][0]]
            else:
                sigma = [
                    sima.spikes.estimate_sigma(sigs[0])
                    for sigs in signals['raw'][0]
                ]
                gamma = [
                    sima.spikes.estimate_gamma(sigs[0], sigm)
                    for sigm, sigs in zip(sigma, signals['raw'][0])
                ]
        else:
            gamma = [
                sima.spikes.estimate_parameters(sigs, gamma, sigma=0)[0]
                for sigs in zip(*signals['raw'])
            ]
            if share_gamma:
                gamma = np.median(gamma)

            # ensure that gamma is a list, one value per ROI
            if isinstance(gamma, float):
                gamma = [gamma for _ in signals['raw'][0]]

            # estimate sigma values
            sigma = [
                sima.spikes.estimate_parameters(sigs, g)[1]
                for g, sigs in zip(gamma, zip(*signals['raw']))
            ]

        # perform spike inference
        spikes, fits, parameters = [], [], []
        for seq_idx, seq_signals in enumerate(signals['raw']):
            spikes.append(np.zeros_like(seq_signals))
            fits.append(np.zeros_like(seq_signals))
            parameters.append(collections.defaultdict(list))
            for i, trace in enumerate(seq_signals):
                spikes[-1][i], fits[-1][i], p = sima.spikes.spike_inference(
                    trace, sigma[i], gamma[i], mode, verbose)
                for k, v in iteritems(p):
                    parameters[-1][k].append(v)
            for v in itervalues(parameters[-1]):
                assert len(v) == len(spikes[-1])
            parameters[-1] = dict(parameters[-1])

        if self.savedir:
            signals['spikes'] = spikes
            signals['spikes_fits'] = fits
            signals['spikes_params'] = parameters
            all_signals[label] = signals

            signals_filename = os.path.join(
                self.savedir,
                'signals_{}.pkl'.format(signals['signal_channel']))

            pickle.dump(all_signals, open(signals_filename, 'wb'),
                        pickle.HIGHEST_PROTOCOL)

        return spikes, fits, parameters
Esempio n. 44
0
 def itervalues(self):
     return itervalues(self.dict)
Esempio n. 45
0
def _validate_and_format_custom_entity(entity, utterance_entities, language,
                                       builtin_entity_parser):
    validate_type(entity, dict, object_label="entity")

    # TODO: this is here temporarily, only to allow backward compatibility
    if MATCHING_STRICTNESS not in entity:
        strictness = entity.get("parser_threshold", 1.0)

        entity[MATCHING_STRICTNESS] = strictness

    mandatory_keys = [
        USE_SYNONYMS, AUTOMATICALLY_EXTENSIBLE, DATA, MATCHING_STRICTNESS
    ]
    validate_keys(entity, mandatory_keys, object_label="custom entity")
    validate_type(entity[USE_SYNONYMS], bool, object_label="use_synonyms")
    validate_type(entity[AUTOMATICALLY_EXTENSIBLE],
                  bool,
                  object_label="automatically_extensible")
    validate_type(entity[DATA], list, object_label="entity data")
    validate_type(entity[MATCHING_STRICTNESS], (float, int),
                  object_label="matching_strictness")

    formatted_entity = dict()
    formatted_entity[AUTOMATICALLY_EXTENSIBLE] = entity[
        AUTOMATICALLY_EXTENSIBLE]
    formatted_entity[MATCHING_STRICTNESS] = entity[MATCHING_STRICTNESS]
    use_synonyms = entity[USE_SYNONYMS]

    # Validate format and filter out unused data
    valid_entity_data = []
    for entry in entity[DATA]:
        validate_type(entry, dict, object_label="entity entry")
        validate_keys(entry, [VALUE, SYNONYMS], object_label="entity entry")
        entry[VALUE] = entry[VALUE].strip()
        if not entry[VALUE]:
            continue
        validate_type(entry[SYNONYMS], list, object_label="entity synonyms")
        entry[SYNONYMS] = [s.strip() for s in entry[SYNONYMS] if s.strip()]
        valid_entity_data.append(entry)
    entity[DATA] = valid_entity_data

    # Compute capitalization before normalizing
    # Normalization lowercase and hence lead to bad capitalization calculation
    formatted_entity[CAPITALIZE] = _has_any_capitalization(
        utterance_entities, language)

    validated_utterances = dict()
    # Map original values an synonyms
    for data in entity[DATA]:
        ent_value = data[VALUE]
        validated_utterances[ent_value] = ent_value
        if use_synonyms:
            for s in data[SYNONYMS]:
                if s not in validated_utterances:
                    validated_utterances[s] = ent_value

    # Number variations in entities values are expensive since each entity
    # value is parsed with the builtin entity parser before creating the
    # variations. We avoid generating these variations if there's enough entity
    # values

    # Add variations if not colliding
    all_original_values = _extract_entity_values(entity)
    if len(entity[DATA]) < VARIATIONS_GENERATION_THRESHOLD:
        variations_args = {"case": True, "and_": True, "punctuation": True}
    else:
        variations_args = {"case": False, "and_": False, "punctuation": False}

    variations_args["numbers"] = len(
        entity[DATA]) < NUMBER_VARIATIONS_THRESHOLD

    variations = dict()
    for data in entity[DATA]:
        ent_value = data[VALUE]
        values_to_variate = {ent_value}
        if use_synonyms:
            values_to_variate.update(set(data[SYNONYMS]))
        variations[ent_value] = set(
            v for value in values_to_variate for v in get_string_variations(
                value, language, builtin_entity_parser, **variations_args))
    variation_counter = Counter(
        [v for variations_ in itervalues(variations) for v in variations_])
    non_colliding_variations = {
        value: [
            v for v in variations
            if v not in all_original_values and variation_counter[v] == 1
        ]
        for value, variations in iteritems(variations)
    }

    for entry in entity[DATA]:
        entry_value = entry[VALUE]
        validated_utterances = _add_entity_variations(
            validated_utterances, non_colliding_variations, entry_value)

    # Merge utterances entities
    utterance_entities_variations = {
        ent: get_string_variations(ent, language, builtin_entity_parser,
                                   **variations_args)
        for ent in utterance_entities
    }

    for original_ent, variations in iteritems(utterance_entities_variations):
        if not original_ent or original_ent in validated_utterances:
            continue
        validated_utterances[original_ent] = original_ent
        for variation in variations:
            if variation and variation not in validated_utterances \
                    and variation not in utterance_entities:
                validated_utterances[variation] = original_ent
    formatted_entity[UTTERANCES] = validated_utterances
    return formatted_entity
Esempio n. 46
0
 def building(self):
     # A LatentWorkerForBuilder will only be busy if it is building.
     return {
         wfb
         for wfb in itervalues(self.workerforbuilders) if wfb.isBusy()
     }
Esempio n. 47
0
  def Flush(self):
    super(OutputPluginWithOutputStreams, self).Flush()

    for stream in itervalues(self.stream_objects):
      stream.Flush()
Esempio n. 48
0
    def get(self, run, subsystem, filename=None):
        # TODO: Validate
        responseHeaders = {}
        responseHeaders["run"] = run
        responseHeaders["subsystem"] = subsystem
        responseHeaders["filenames"] = []

        # Return the filename for the particular run
        subsystemContainer = db["runs"]["Run{0}".format(
            run)].subsystems[subsystem]

        # Handle special cases
        if not filename:
            # Return the available files
            responseHeaders["filenames"] = [
                tempFile.filename.split("/")[-1]
                for tempFile in subsystemContainer.files.values()
            ]
            return responseForSendingFile(additionalHeaders=responseHeaders)
        elif filename == "combined":
            # Return the combined file
            responseHeaders["filenames"].append(
                os.path.join(apiParameters["dirPrefix"],
                             subsystemContainer.combinedFile.filename))
            response = responseForSendingFile(
                filename=subsystemContainer.combinedFile.filename,
                additionalHeaders=responseHeaders)
            print("response: {}".format(response))
            return response

        #filename = secure_filename(filename)

        # Look for the file
        print(next(itervalues(subsystemContainer.files)).filename)
        try:
            requestedFile = next(
                fileContainer
                for fileContainer in subsystemContainer.files.values()
                if fileContainer.filename.split("/")[-1] == filename)
        except StopIteration as e:
            print("Stop iteration error!")
            response = responseForSendingFile(
                additionalHeaders=responseHeaders)
            response.headers[
                "error"] = "Could not find requested file {0}".format(filename)
            response.status_code = 404
            return response

        print("filename for requested file: {}".format(
            os.path.join(apiParameters["dirPrefix"], requestedFile.filename)))
        responseHeaders["filenames"].append(
            os.path.join(apiParameters["dirPrefix"], requestedFile.filename))
        #f = open(os.path.join(apiParameters["dirPrefix"], requestedFile.filename))
        #return make_response(send_file(f, as_attachment = True, attachment_filename = filename))
        #with open(os.path.join(apiParameters["dirPrefix"], requestedFile.filename)) as f:
        with openFile(requestedFile.filename, "rb") as f:
            # If StringIO is not used here then the file will go out of scope and be closed before the
            # response is completed, which leads to "ValueError: I/O operation on closed file".
            # I cannot seem to find a way around this. However, the file cannot just be opened, as otherwise
            # it will leak memory.
            # For more, see: https://stackoverflow.com/q/13344538 (no definitive solution)
            #                https://stackoverflow.com/a/25150805 (gave me the idea to just create a new StringIO)
            # StringIO was selected based on https://stackoverflow.com/a/37463095
            return make_response(
                send_file(StringIO.StringIO(f.read()),
                          as_attachment=True,
                          attachment_filename=filename))
Esempio n. 49
0
        def thd(conn):
            workers_tbl = self.db.model.workers
            conn_tbl = self.db.model.connected_workers
            cfg_tbl = self.db.model.configured_workers
            bm_tbl = self.db.model.builder_masters

            def selectWorker(q):
                return q

            # first, get the worker itself and the configured_on info
            j = workers_tbl
            j = j.outerjoin(cfg_tbl)
            j = j.outerjoin(bm_tbl)
            q = sa.select([
                workers_tbl.c.id, workers_tbl.c.name, workers_tbl.c.info,
                bm_tbl.c.builderid, bm_tbl.c.masterid
            ],
                          from_obj=[j],
                          order_by=[workers_tbl.c.id])

            if _workerid is not None:
                q = q.where(workers_tbl.c.id == _workerid)
            if _name is not None:
                q = q.where(workers_tbl.c.name == _name)
            if masterid is not None:
                q = q.where(bm_tbl.c.masterid == masterid)
            if builderid is not None:
                q = q.where(bm_tbl.c.builderid == builderid)

            rv = {}
            res = None
            lastId = None
            cfgs = None
            for row in conn.execute(q):
                if row.id != lastId:
                    lastId = row.id
                    cfgs = []
                    res = {
                        'id': lastId,
                        'name': row.name,
                        'configured_on': cfgs,
                        'connected_to': [],
                        'workerinfo': row.info
                    }
                    rv[lastId] = res
                if row.builderid and row.masterid:
                    cfgs.append({
                        'builderid': row.builderid,
                        'masterid': row.masterid
                    })

            # now go back and get the connection info for the same set of
            # workers
            j = conn_tbl
            if _name is not None:
                # note this is not an outer join; if there are unconnected
                # workers, they were captured in rv above
                j = j.join(workers_tbl)
            q = sa.select([conn_tbl.c.workerid, conn_tbl.c.masterid],
                          from_obj=[j],
                          order_by=[conn_tbl.c.workerid])

            if _workerid is not None:
                q = q.where(conn_tbl.c.workerid == _workerid)
            if _name is not None:
                q = q.where(workers_tbl.c.name == _name)
            if masterid is not None:
                q = q.where(conn_tbl.c.masterid == masterid)

            for row in conn.execute(q):
                id = row.workerid
                if id not in rv:
                    continue
                rv[row.workerid]['connected_to'].append(row.masterid)

            return list(itervalues(rv))
Esempio n. 50
0
 def getConfigDict(self):
     return {
         'name': self.name,
         'childs':
         [v.getConfigDict() for v in itervalues(self.namedServices)]
     }
Esempio n. 51
0
 def _CheckAllRunning(self, action):
     solutioncompleters = itervalues(self._completer_per_solution)
     return all(
         action(completer) for completer in solutioncompleters
         if completer._ServerIsRunning())
Esempio n. 52
0
 def VerifierClassesForPlugin(cls, plugin_name):
   if not plugin_name:
     return []
   return [c for c in itervalues(cls.classes) if c.plugin_name == plugin_name]
Esempio n. 53
0
    def Flush(self):
        """Writes the changes in this object to the datastore."""

        if data_store.RelationalDBReadEnabled(category="message_handlers"):
            message_handler_requests = []
            leftover_responses = []

            for r, timestamp in self.response_queue:
                if r.request_id == 0 and r.session_id in session_id_map:
                    message_handler_requests.append(
                        rdf_objects.MessageHandlerRequest(
                            client_id=r.source and r.source.Basename(),
                            handler_name=session_id_map[r.session_id],
                            request_id=r.response_id,
                            request=r.payload))
                else:
                    leftover_responses.append((r, timestamp))

            if message_handler_requests:
                data_store.REL_DB.WriteMessageHandlerRequests(
                    message_handler_requests)
            self.response_queue = leftover_responses

        self.data_store.StoreRequestsAndResponses(
            new_requests=self.request_queue,
            new_responses=self.response_queue,
            requests_to_delete=self.requests_to_delete)

        # We need to make sure that notifications are written after the requests so
        # we flush after writing all requests and only notify afterwards.
        mutation_pool = self.data_store.GetMutationPool()
        with mutation_pool:

            if data_store.RelationalDBReadEnabled(category="client_messages"):
                if self.client_messages_to_delete:
                    data_store.REL_DB.DeleteClientMessages(
                        list(itervalues(self.client_messages_to_delete)))
            else:
                messages_by_queue = utils.GroupBy(
                    list(itervalues(self.client_messages_to_delete)),
                    lambda request: request.queue)
                for queue, messages in iteritems(messages_by_queue):
                    self.Delete(queue, messages, mutation_pool=mutation_pool)

            if self.new_client_messages:
                for timestamp, messages in iteritems(
                        utils.GroupBy(self.new_client_messages,
                                      lambda x: x[1])):

                    self.Schedule([x[0] for x in messages],
                                  timestamp=timestamp,
                                  mutation_pool=mutation_pool)

        if self.notifications:
            for notification in itervalues(self.notifications):
                self.NotifyQueue(notification, mutation_pool=mutation_pool)

            mutation_pool.Flush()

        self.request_queue = []
        self.response_queue = []
        self.requests_to_delete = []

        self.client_messages_to_delete = {}
        self.notifications = {}
        self.new_client_messages = []
Esempio n. 54
0
 def Shutdown(self):
     if self.user_options['auto_stop_csharp_server']:
         for solutioncompleter in itervalues(self._completer_per_solution):
             solutioncompleter._StopServer()
Esempio n. 55
0
    def _process_changes(self, newRev, branch):
        """
        Read changes since last change.

        - Read list of commit hashes.
        - Extract details from each commit.
        - Add changes to database.
        """

        # initial run, don't parse all history
        if not self.lastRev:
            return
        rebuild = False
        if newRev in itervalues(self.lastRev):
            if self.buildPushesWithNoCommits and \
               branch not in iterkeys(self.lastRev):
                # we know the newRev but not for this branch
                log.msg('gitpoller: rebuilding %s for new branch "%s"' %
                        (newRev, branch))
                rebuild = True

        # get the change list
        revListArgs = ([r'--format=%H', r'%s' % newRev] + [
            r'^%s' % rev.encode('ascii', 'ignore')
            for rev in itervalues(self.lastRev)
        ] + [r'--'])
        self.changeCount = 0
        results = yield self._dovccmd('log', revListArgs, path=self.workdir)

        # process oldest change first
        revList = results.split()
        revList.reverse()

        if rebuild and len(revList) == 0:
            revList = [newRev]

        self.changeCount = len(revList)
        self.lastRev[branch] = newRev

        if self.changeCount:
            log.msg(
                'gitpoller: processing %d changes: %s from "%s" branch "%s"' %
                (self.changeCount, revList, self.repourl, branch))

        for rev in revList:
            dl = defer.DeferredList([
                self._get_commit_timestamp(rev),
                self._get_commit_author(rev),
                self._get_commit_files(rev),
                self._get_commit_comments(rev),
            ],
                                    consumeErrors=True)

            results = yield dl

            # check for failures
            failures = [r[1] for r in results if not r[0]]
            if failures:
                for failure in failures:
                    log.error(
                        failure, "while processing changes for {} {}".format(
                            newRev, branch))
                # just fail on the first error; they're probably all related!
                failures[0].raiseException()

            timestamp, author, files, comments = [r[1] for r in results]

            yield self.master.data.updates.addChange(
                author=author,
                revision=ascii2unicode(rev),
                files=files,
                comments=comments,
                when_timestamp=timestamp,
                branch=ascii2unicode(self._removeHeads(branch)),
                project=self.project,
                repository=ascii2unicode(self.repourl),
                category=self.category,
                src=u'git')
Esempio n. 56
0
 def check(rc):
     self.assertEqual(rc, 0)
     self.assertEqual(calls, functions)
     for repl in itervalues(repls):
         repl.assert_called_with(config)
Esempio n. 57
0
 def expectedBuildTime(self):
     if None in list(itervalues(self.times)):
         return None
     return sum(list(itervalues(self.times)))
Esempio n. 58
0
 def test_api_collection_order_desc(self):
     yield self.render_resource(self.rsrc, b'/test?order=-info')
     self.assertRestCollection(typeName='tests',
                               items=sorted(list(itervalues(endpoint.testData)),
                                            key=lambda v: v['info'], reverse=True),
                               total=8, orderSignificant=True)
Esempio n. 59
0
 def __iter__(self):
     for value in itervalues(self._backing):
         yield value
Esempio n. 60
0
 def test_api_collection(self):
     yield self.render_resource(self.rsrc, b'/test')
     self.assertRestCollection(typeName='tests',
                               items=list(itervalues(endpoint.testData)),
                               total=8)