Exemple #1
    def __enter__(self):
        if isinstance(self.fn_or_stream, str):
            # Open the named file.
                self.stream = open(self.fn_or_stream, "r+")
            except FileNotFoundError:
                if not isinstance(self.default, (list, dict)):
                    # If there is no default and the file
                    # does not exist, re-raise the exception.
                    # Create a new file holding the default,
                    # then seek back to the beginning so
                    # we can read it below.
                    self.stream = open(self.fn_or_stream, "w+")
                    rtyaml.dump(self.default, self.stream)

            self.close_on_exit = True
            # Use the given stream.
            self.stream = self.fn_or_stream
        # Parse stream and return data.
        self.data = rtyaml.load(self.stream)
        return self.data
Exemple #2
def process(selection, template_file, template_path, output_path, logger):
    logger.print("Checking {}".format(template_file))
        with open(template_file) as fp:
            output_file = rewrite(template_file, template_path, output_path)
            output_file_p = Path(output_file)
            if not output_file_p.parent.is_dir():
                output_file_p.parent.mkdir(parents=True, exist_ok=True)

            if template_file.name == 'component.yaml':
                logger.print("  Copying {} to {}".format(
                    template_file, output_file))
                shutil.copy(template_file, output_file)
                object = rtyaml.load(fp)
                object = select_controls(object, selection)
                controls = sorted(control['control_key']
                                  for control in object['satisfies'])
                logger.print("  Writing controls to {}".format(output_file))
                for control in controls:
                    logger.print("    {}".format(control))
                with open(output_file, "w") as out:
                    rtyaml.dump(object, out)

    except Exception as e:
        print("Exception {} processing {}".format(e, template_file))
Exemple #3
def update_sitemap(url, current_lastmod, how_we_got_here, options):
    """Updates the local cache of a sitemap file."""

    # Skip if the year or congress flags are set and this sitemap is
    # not for that year or Congress.
    if should_skip_sitemap(url, options):
        return []

    # For debugging, remember what URLs we are stepping through.
    how_we_got_here = how_we_got_here + [url]

    # Get the file paths to cache:
    # * the sitemap XML for future runs
    # * its <lastmod> date (which comes from the parent sitemap) so we know if we need to re-download it now
    # * the <lastmod> dates of the packages listed in this sitemap so we know if we need to re-download any package files
    cache_file = get_sitemap_cache_file(url)
    cache_file = os.path.join("govinfo/sitemap", cache_file, "sitemap.xml")
    lastmod_cache_file = cache_file.replace(".xml", "-lastmod.yaml")
    lastmod_cache_file = os.path.join(utils.cache_dir(), lastmod_cache_file)
    if not os.path.exists(lastmod_cache_file):
        lastmod_cache = { }
        with open(lastmod_cache_file) as f:
            lastmod_cache = rtyaml.load(f)

        return update_sitemap2(url, current_lastmod, how_we_got_here, options, lastmod_cache, cache_file)
        # Write the updated last modified dates to disk so we know the next time whether
        # we need to fetch the files. If we didn't download anything, no need to write an
        # empty file.
        with utils.NoInterrupt():
            with open(lastmod_cache_file, "w") as f:
                rtyaml.dump(lastmod_cache, f)
Exemple #4
def yaml_dump(data, path):
    rtyaml.dump(data, open(path, "w"))

    # Store in a pickled file for fast access later.
    import cPickle as pickle, hashlib
    h = hashlib.sha1(open(path).read()).hexdigest()
    pickle.dump({ "hash": h, "data": data }, open(path+".pickle", "w"))
Exemple #5
def process_xccdf_group(xccdf, xccdf_path, outdir, rule_profiles, group_path, drop_id_prefix):
	# Process all of the rules here.
	rules = []
	for rule in xccdf.findall("{http://checklists.nist.gov/xccdf/1.2}Rule"):
		rules.append(process_rule(rule, rule_profiles, xccdf_path, group_path, outdir, drop_id_prefix))
	# Process all of the groups here
	groups = []
	for group in xccdf.findall("{http://checklists.nist.gov/xccdf/1.2}Group"):
		# a nice directory name for the group
		g = group.get('id')
		g = re.sub('^xccdf_org\.(.*)\.content_group_(.*)$', r'\1_\2', g)
		if drop_id_prefix and g.startswith(drop_id_prefix):
			g = g[len(drop_id_prefix):]
			child_drop_id_prefix = drop_id_prefix
		elif "_" in g:
			child_drop_id_prefix = g.split("_")[0] + "_"
			child_drop_id_prefix = None

		process_xccdf_group(group, xccdf_path, outdir, rule_profiles, group_path + [g], child_drop_id_prefix)

	groupdict = collections.OrderedDict([
		("id", xccdf.get("id")),
		("title", xccdf.find("{http://checklists.nist.gov/xccdf/1.2}title").text),
		("description", pandoc(xccdf.find("{http://checklists.nist.gov/xccdf/1.2}description"), 'html', 'markdown')),
		("rules", rules),
		("subgroups", groups),
	fn = os.path.join(*([outdir] + group_path + ['group.yaml']))
	os.makedirs(os.path.dirname(fn), exist_ok=True)
	with open(fn, "w") as f:
		rtyaml.dump(groupdict, f)
Exemple #6
    def save_as(self, base_dir):
        "Save an OpenControl repo in a new location"
        root = self.dict(exclude={"standards", "components", "systems"})
        root["certifications"] = []
        for cert in self.certifications:
            cert_storage = cert.storage_path(base_dir)
            cert_storage.parent.mkdir(parents=True, exist_ok=True)
            with cert_storage.open("w") as cert_file:
                FILE_SIGNAL.send(self, operation="write", path=cert_storage)

        root["standards"] = []
        for std in self.standards.values():
            std_storage = std.storage_path(base_dir)
            std_storage.parent.mkdir(parents=True, exist_ok=True)
            with std_storage.open("w") as std_file:
                FILE_SIGNAL.send(self, operation="write", path=std_storage)

        root["components"] = [str(c.storage_path()) for c in self.components]

        root_storage = self.storage_path(base_dir)
        with root_storage.open("w") as root_file:
            FILE_SIGNAL.send(self, operation="write", path=root_storage)

        for c in self.components:
            component_path = c.storage_path(base_dir)
            component_path.parent.mkdir(parents=True, exist_ok=True)

            with component_path.open("w") as component_file:
            FILE_SIGNAL.send(self, operation="write", path=component_path)
Exemple #7
def check_id_types(legislator, seen_ids, is_legislator, context):
    for key, value in legislator["id"].items():
        # Check that the id key is one we know about.
        if key not in id_types:
            error(context, rtyaml.dump({key: value}) + " is not a valid id.")

        # Check that the data type is correct.
        elif not isinstance(value, id_types[key]):
                  rtyaml.dump({key: value}) + " has an invalid data type.")

            # Check that the ID isn't duplicated across legislators.
            # Since some values are lists of IDs, check the elements.
            # Just make a list of ID occurrences here -- we'll check
            # uniqueness at the end.
            if not isinstance(value, list): value = [value]
            for v in value:
                seen_ids.setdefault((key, v), []).append(legislator)

    if is_legislator:
        # Check that every legislator has ids of the required types.
        for id_type in id_required:
            if id_type not in legislator["id"]:
                error(context, "Missing %s id." % id_type)
Exemple #8
def yaml_dump(data, path):
    rtyaml.dump(data, open(path, "w"))

    # Store in a pickled file for fast access later.
    import cPickle as pickle, hashlib
    h = hashlib.sha1(open(path).read()).hexdigest()
    pickle.dump({"hash": h, "data": data}, open(path + ".pickle", "w"))
def update_component_control(controlimpl):
    # Clean the inputs. Update controlimpl so the caller has the actual values we saved here.
    controlimpl["narrative"] = clean_text(controlimpl["narrative"])
    if controlimpl["implementation_status"]:
        controlimpl["implementation_status"] = clean_text(

    # The control is defined in the component.yaml file given in controlimpl["source_file"].
    # Open that file for editing, find the control record, update it, and return.
    with open(controlimpl["source_file"], "r+", encoding="utf8") as f:
        # Parse the content.
        data = rtyaml.load(f)

        # Look for a matching control entry.
        for control in data["satisfies"]:
            # Skip over entries that are strings -- they hold (OpenControl non-conformant) filenames.
            if not isinstance(control, dict):

            if control["standard_key"] == controlimpl["standard"]["id"] \
              and control["control_key"] == controlimpl["control"]["id"]:

                for narrative_part in control.get("narrative", []):
                    if narrative_part.get("key") == controlimpl.get(

                        # Found the right entry. Update the fields.

                        narrative_part["text"] = controlimpl["narrative"]

                        # Store implementation_status here. In OpenControl there is
                        # a `implementation_statuses` on the control. But our data
                        # model has a single implementation_status per control *part*.
                        # If the implementation status is cleared, remove the key.
                        if controlimpl["implementation_status"]:
                                "implementation_status"] = controlimpl[
                        elif "implementation_status" in narrative_part:
                            del narrative_part["implementation_status"]

                        # Write back out to the data files.
                        rtyaml.dump(data, f)

                        return True

    return False
def create_system(organization_name, system_name, description, repo_path):
    """Create a new system and its repository and return path to repo on file system"""

    # make repo directory
    if os.path.exists(repo_path):
        print("Path {} exists".format(repo_path))
        print("Path {} created".format(repo_path))

    # get default opencontrol.yaml configuration
    cfg = get_new_config(system_name, organization_name, description)
    print("\npreparing system dir: {}".format(system_name))

    # create various directories
    os.makedirs(os.path.join(repo_path, "components"))
    os.makedirs(os.path.join(repo_path, "standards"))
    os.makedirs(os.path.join(repo_path, "certifications"))
    os.makedirs(os.path.join(repo_path, "outputs"))

    # create opencontrol.yaml config file
    with open(os.path.join(repo_path, "opencontrol.yaml"), 'w') as outfile:
        print("wrote file: {}\n".format(
            os.path.join(repo_path, "opencontrol.yaml")))

    # populate reference directories from reference
        os.path.join("ref", "standards", "NIST-SP-800-53-rev4.yaml"),
        os.path.join(repo_path, "standards", "NIST-SP-800-53-rev4.yaml"))
    print("wrote file: {}\n".format(
        os.path.join(repo_path, "standards", "NIST-SP-800-53-rev4.yaml")))
    shutil.copyfile(os.path.join("ref", "standards", "opencontrol.yaml"),
                    os.path.join(repo_path, "standards", "opencontrol.yaml"))
    print("wrote file: {}\n".format(
        os.path.join(repo_path, "standards", "opencontrol.yaml")))
    # shutil.copyfile(os.path.join("ref", "standards", "hipaa-draft.yaml"), os.path.join(repo_path, cfg["standards"][0], "hipaa-draft.yaml"))
    # print("wrote file: {}\n".format(os.path.join(repo_path, cfg["standards"][0], "hipaa-draft.yaml")))
        os.path.join("ref", "certifications", "fisma-low-impact.yaml"),
        os.path.join(repo_path, "certifications", "fisma-low-impact.yaml"))
    print("wrote file: {}\n".format(
        os.path.join(repo_path, "certifications", "fisma-low-impact.yaml")))

    # make stub README.md file
    with open(os.path.join(repo_path, "README.md"), 'w') as outfile:
            "Machine readable representation of 800-53 control implementations for {}.\n\n# Notes\n\n"
        print("wrote file: {}\n".format(os.path.join(repo_path, "README.md")))

    # append repo path to repos.conf
    # TODO - read and clean repos.conf and then append;use clean_text function?
    with open("repos.conf", 'a') as outfile:
        print("appended {} to file: repos.conf\n".format(repo_path))

    # Now return the path to the repository
    return repo_path
Exemple #20
def create_pledge_donation(pledge, recipients):
	# Pledge execution --- make a credit card charge and return
	# the DE donation record and other details.

	# Compute the amount to charge the user. We can only make whole-penny
	# contributions, so the exact amount of the charge may be less than
	# what the user pledged. recip_contribs is the line item amounts for
	# each recipient as a tuple of (recipient, action, amount).
	recip_contribs, fees, total_charge = compute_charge(pledge, recipients)

	# Prepare line items for the API.
	line_items = []

	# Create the line item for fees.
		"recipient_id": DemocracyEngineAPI.fees_recipient_id,
		"amount": DemocracyEngineAPI.format_decimal(fees),

	# Create the line items for campaign recipients.
	for action, recipient_type, recipient, amount in recip_contribs:
			"recipient_id": recipient.de_id,
			"amount": DemocracyEngineAPI.format_decimal(amount),

	# Prepare the donation record for authorization & capture.
	de_don_req = create_de_donation_basic_dict(pledge)
		# billing info
		"token": pledge.profile.extra['billing']['de_cc_token'],

		# line items
		"line_items": line_items,

		# reported to the recipient
		"source_code": "",
		"ref_code": "",

		# tracking info for internal use
		"aux_data": rtyaml.dump({ # DE will gives this back to us encoded as YAML, but the dict encoding is ruby-ish so to be sure we can parse it, we'll encode it first
			"trigger": pledge.trigger.id,
			"campaign": pledge.via_campaign.id,
			"pledge": pledge.id,
			"user": pledge.user.id if pledge.user else None,
			"email": pledge.get_email(),
			"pledge_created": pledge.created,

	# Sanity check the total.
	if sum(decimal.Decimal(li['amount'].replace("$", "")) for li in de_don_req['line_items']) \
		!= total_charge:
		raise ValueError("Sum of line items does not match total charge.")
	# Create the 'donation', which creates a transaction and performs cc authorization.
	don = DemocracyEngineAPI.create_donation(de_don_req)

	# Return.
	return (recip_contribs, fees, total_charge, don)
def create_component(project, component_path, component_name):
    # Create a new OpenControl component.

    # Create the stub data structure.
    component_opencontrol = OrderedDict()
    component_opencontrol['schema_version'] = '3.0.0'
    component_opencontrol['name'] = component_name

    # Create the path.
    os.makedirs(os.path.join(project['path'], component_path))

    # Write the component.yaml file.
    with open(os.path.join(project['path'], component_path, 'component.yaml'),
              encoding="utf8") as f:

    # Add the path to the project's opencontrol.yaml file.
    with open(os.path.join(project["path"], 'opencontrol.yaml'),
              encoding="utf8") as f:
        # Parse the content.
        data = rtyaml.load(f)

        # Create the "components" array if it does not exist.
        if not isinstance(data.get("components"), list):
            data["components"] = []

        # Append the new component path.

        # Write back out to the data files.
        rtyaml.dump(data, f)

    # Read the component back and return it.
    for component in load_project_components(project):
        if component["path"] == os.path.join(project['path'], component_path):
            return component

    raise ValueError(
        "Component {} does not exist in project {} even after creating it.".
        format(component_path, project["id"]))
def check_bio(bio, is_current_legislator, context):
  for key, value in bio.items():
    if key not in bio_keys:
      error(context, "%s is not a valid key in bio." % key)
    elif not isinstance(value, str):
      error(context, rtyaml.dump({ key: value }) + " has an invalid data type.")
  if is_current_legislator:
    # These keys are required only for current legislators.
    # We don't always have the information for historical members of Congress or presidents.
    for key in bio_keys:
      if key not in bio:
        error(context, "Missing bio->{}.".format(key))
  # Open and iterate over the entries.
  with open(fn) as f:
    legislators = rtyaml.load(f)
  for legislator in legislators:
    # Check the IDs.
    if "id" not in legislator:
      error(repr(legislator) + " is missing 'id'.")
      # Check that the IDs are valid.
      check_id_types(legislator, seen_ids, True)

    # Check the name.
    if "name" not in legislator:
      error(repr(legislator) + " is missing 'name'.")
    for name in legislator.get("other_names", []):
      check_name(name, is_other_names=True)

    # Check the biographical fields.
    if "bio" not in legislator:
      error(repr(legislator) + " is missing 'bio'.")

    # Check the terms.
    if "terms" not in legislator:
      error(repr(legislator) + " is missing 'terms'.")
    elif not isinstance(legislator["terms"], list):
      error(repr(legislator) + " terms has an invalid data type.")
    elif len(legislator["terms"]) == 0:
      error(repr(legislator) + " terms is empty.")
      prev_term = None
      for i, term in enumerate(legislator["terms"]):
        check_term(term, prev_term,
          current=(current and i==len(legislator["terms"])-1),
        prev_term = term

    # Check the leadership roles.
    for role in legislator.get("leadership_roles", []):
      # All of these fields must be strings.
      for key, value in role.items():
        if not isinstance(value, str):
          error(rtyaml.dump({ key: value }) + " has an invalid data type.")

      # Check required fields.
      if "title" not in role:
        error(rtyaml.dump(role) + " is missing title.")
      if role.get("chamber") not in ("house", "senate"):
        error(rtyaml.dump(role) + " has an invalid chamber.")
      if "start" not in role:
        error(rtyaml.dump(role) + " is missing start.")
      if "end" not in role and not current:
        # end is required only in the historical file
        error(rtyaml.dump(role) + " is missing end.")

      # Check dates.
      start = check_date(role['start'])
      if "end" in role:
        end = check_date(role['end'])
        if start and end and end < start:
          error(rtyaml.dump(role) + " has end before start.")
def check_term(term, prev_term, context, current=None, current_mocs=None):
  # Check type.
  if term.get("type") not in ("rep", "sen"):
    error(context, "Term has invalid 'type'.")

  # Check date range.
  start = check_date(term.get('start'), context)
  end = check_date(term.get('end'), context)
  if start and end:
    context += "({} to {})".format(start, end)

    if end < start:
      error(context, "Term has end before start.")

    # TODO: Remove 'and end > "2000-"'. I'm just adding it because
    # lots of historical data fails this test.
    if prev_term and end > date(2000,1,1):
      prev_end = check_date(prev_term.get("end"), context)
      if prev_end:
        if start < prev_end:
          error(context, "Term has start before previous term's end.")

    if not current and (end > now):
      error(context, "Term has an end date in the future but is a past term.")
    if current and (end < now):
      error(context, "Term has an end date in the past but is a most recent term in the current file.")

  # Check how.
  if term.get("how") not in (None, "appointment",):
    error(context, "Term has invalid 'how'.")

  # Check end-type.
  if term.get("end-type") not in (None, "special-election",):
    error(context, "Term has invalid 'end-type'.")
  if term.get("end-type") == "special-election" and term.get("how") != "appointment":
    error(context, "Term can't have an 'end-type' without being an appointed senator.")

  # Check state, district, class, state_rank.
  if term.get("state") not in utils.states:
    error(context, "Term has invalid state.")
  if term.get("type") == "rep":
    if not isinstance(term.get("district"), int):
      error(context, "Term has invalid district.")
  if term.get("type") == "sen":
    if term.get("class") not in (1, 2, 3):
      error(context, "Term has invalid class.")
    if term.get("state_rank") not in ("junior", "senior", None):
      error(context, "Term has invalid senator state_rank.")
    elif current and term.get("state_rank") is None:
      error(context, "Term is missing senator state_rank.")

  if current:
    # Check uniqueness of office for current members.

    # Check office.
    office = (term.get("type"), term.get("state"), term.get("district") if term.get("type") == "rep" else term.get("class"))
    if office in current_mocs:
      error(context, "Term duplicates an office.")

    # Check senator rank isn't duplicated.
    if term.get("type") == "sen":
      office = (term.get("state"), term.get("state_rank"))
      if office in current_mocs:
        error(context, "Term duplicates state_rank in a state.")

    # Check party of current members (historical is too difficult).
    if term.get("party") not in ("Republican", "Democrat", "Independent"):
      error(context, rtyaml.dump({ "party": term.get("party") }) + " is invalid.")

    # Check caucus of Independent members.
    if term.get("party") == "Independent" and term.get("caucus") not in ("Republican", "Democrat"):
      error(context, rtyaml.dump({ "caucus": term.get("caucus") }) + " is invalid when party is Independent.")

    # Check website -- it's optional, so warn.
    if not term.get("url"):
      print(context, "Term is missing a website url.")
Exemple #47
