Ejemplo n.º 1
0
def bootstrap(f_produce, partition_count, schema, database, table, config):
    start_time_millis = time() * 1000.0
    inserted_rows = 0
    total_rows = int(
        float(config['mysql']['schemas'][schema]['tables'][table][database]
              ['size']))
    topic = config['mysql']['schemas'][schema]['tables'][table][database][
        'topic']

    partition = abs(java_string_hashcode(database) % partition_count)
    produce(f_produce, partition,
            *bootstrap_start_message(topic, schema, database, table, config))
    last_display_progress = time()
    for _, key, value in bootstrap_insert_messages(topic, schema, database,
                                                   table, config, total_rows):
        produce(f_produce, partition, topic, key, value)
        inserted_rows += 1
        if time() - last_display_progress > (DISPLAY_PROGRESS_PERIOD_MILLIS /
                                             1000.0):
            display_progress(total_rows, inserted_rows, start_time_millis)
            last_display_progress = time()
    produce(
        f_produce, partition,
        *bootstrap_complete_message(topic, schema, database, table, config))
    display_line("")
Ejemplo n.º 2
0
 def consume(key, value):
     partition = abs(java_string_hashcode(key['database']) % partition_count)
     output = {
         "partition": partition,
         "key": key,
         "message": value
     }
     print json.dumps(output, separators=(',',':'))
Ejemplo n.º 3
0
 def consume(key, value):
     database = key['database']
     key_str = json.dumps(key, separators=(',', ':'))
     value_str = json.dumps(value, separators=(',', ':'))
     partition = abs(java_string_hashcode(database) % partition_count)
     kafka_producer.send(topic,
                         key=key_str,
                         value=value_str,
                         partition=partition)
Ejemplo n.º 4
0
def bootstrap(f_produce, partition_count, schema, database, table, config):
    topic = config['kafka']['topic']
    start_time_millis = time() * 1000.0
    inserted_rows = 0
    total_rows = int(float(config['mysql']['schemas'][schema]['tables'][table][database]['size']))

    partition = abs(java_string_hashcode(database) % partition_count)
    produce(f_produce, topic, partition, *bootstrap_start_message(schema, database, table, config))
    last_display_progress = time()
    for key, value in bootstrap_insert_messages(schema, database, table, config, total_rows):
        produce(f_produce, topic, partition, key, value)
        inserted_rows += 1
        if time() - last_display_progress > (DISPLAY_PROGRESS_PERIOD_MILLIS / 1000.0):
            display_progress(total_rows, inserted_rows, start_time_millis)
            last_display_progress = time()
    produce(f_produce, topic, partition, *bootstrap_complete_message(schema, database, table, config))
    display_line("")
Ejemplo n.º 5
0
def main() -> None:
    parser = argparse.ArgumentParser(
        description="prints the contents of a config file to standard out.")
    parser.add_argument("sfile", type=str, help="student name file")
    parser.add_argument("base_config", type=str, help="base config template")

    args = parser.parse_args()
    with open(args.base_config, 'r') as f:
        conf = json.load(f)

    # seed RNG based on assignment name
    seed = java_string_hashcode(conf["assignment_name"])
    random.seed(seed)

    # read student names from input file
    groups: List[List[str]] = []
    f = open(args.sfile)
    for line in f:
        group = line.rstrip().split(",")
        groups.append(group)
    f.close()

    # "randomly" shuffle group list
    random.shuffle(groups)

    # pair students with repositories
    repo_map: Dict[str, str] = {}
    for group in groups:
        # synthesize repo name
        repo = group2repo(conf["course"], conf["assignment_name"], group)
        for student in group:
            # add pairing to json
            repo_map[student] = repo

    # print config
    conf["repository_map"] = repo_map

    print(json.dumps(conf, indent=4, sort_keys=True))
Ejemplo n.º 6
0
 def consume(key, value):
     partition = abs(
         java_string_hashcode(key['database']) % partition_count)
     output = {"partition": partition, "key": key, "message": value}
     print json.dumps(output, separators=(',', ':'))
Ejemplo n.º 7
0
    def __init__(self, json_conf_file: str, verbosity: bool):

        # open config file
        with open(json_conf_file, 'r') as f:
            # read config
            conf = json.loads(f.read())
            self.jsondict = conf
            "A dictionary object for parsed configuration file"

        # declare/init fields

        self.verbose: bool = verbosity
        "Flag to enable verbose output"

        self.hostname: str = conf["hostname"]
        """
        The name of your SSH `config` host to use for script
        interaction. This allows you to use a different GitHub identity for
        managing course scripts because, presently, PyGithub does not support
        two-factor authentication.
        """

        self.user2repo: Dict[str, str] = {}
        "A dictionary mapping user to their repository name."

        self.repo2group: Dict[str, List[str]] = {}
        """A dictionary mapping repo name to the list of students (assuming
        group assignment). The list will be length 1 if individual assignment.
        """

        self.ta_assignments: Dict[str, str] = {}
        "A dictionary mapping each repository to the grading TAs."

        self.course: str = conf["course"]
        "The name of the course."

        self.assignment_name: str = conf["assignment_name"]
        "The name of the assignment."

        self.starter_repo: str = conf["starter_repo"]
        """
        Path to starter repo.  Starter code is distributed by setting each
        student repository as a "remote" for the starter repository and then
        `push`ing.  Student repositories _must_ be empty (i.e., no `main`
        branch) otherwise `push` will fail.
        """

        self.github_org: str = conf["github_org"]
        "Name of the GitHub organization to use."

        self.archive_path: str = conf["archive_path"]
        """Path to folder intended as deanonymized repository of student
        submissions for Academic Honor Code cases."""

        self.submission_path: str = conf["submission_path"]
        """Path to faculty-only staging area for squashing and modifying TA
        feedback before issuing pull requests."""

        self.ta_path: str = conf["ta_path"]
        """Path to TA staging area where anonymized student submissions are
        copied."""

        self.feedback_branch: str = conf["feedback_branch"]
        """Branch to commit TA/instructor feedback on. Pull requests are
        issued from this branch."""

        self.default_branch: str = conf["default_branch"] \
            if "default_branch" in conf else "main"
        "Branch that student commits to. Defaults to `main` if not specified."

        if "do_not_accept_changes_after_due_date_timestamp" in conf:
            # TODO: type? best guess is int
            self.due_date = \
                conf["do_not_accept_changes_after_due_date_timestamp"]
            "A UNIX timestamp representing the due date in the local timezone."

        self.anonymize_sub_path: bool = conf["anonymize_sub_path"] \
            if "anonymize_sub_path" in conf else True
        """ whether the contents of the `submissions` folder, which is viewable
        only by faculty (not TAs), is anonymized."""

        self.rsync_excludes: List[str] = conf["rsync_excludes"]
        """List of files & directories to be excluded from rsync when copying
        to TA folder."""

        # populate mappings (user2repo, repo2group)
        for student in conf["repository_map"].keys():
            self.add_mapping(student, conf["repository_map"][student])

        # read TA list
        tas: List[str] = conf["TAs"]
        tas.sort()  # sorting ensures that TA order is deterministic

        # generate TA map
        random.seed(java_string_hashcode(conf["assignment_name"]))
        repos = self.repositories
        random.shuffle(repos)
        self.ta_assignments = round_robin_map(tas, repos)
Ejemplo n.º 8
0
 def consume(key, value):
     database = key['database']
     key_str = json.dumps(key, separators=(',',':'))
     value_str = json.dumps(value, separators=(',',':'))
     partition = abs(java_string_hashcode(database) % partition_count)
     kafka_producer.send(topic, key=key_str, value=value_str, partition=partition)