Beispiel #1
0
    def generate_data(self, session, engine, base, num_records,
                      current_batch_num):
        output_stream = SqlOutputStream(engine, self.mappings)
        old_continuation_file = self.get_old_continuation_file()
        if old_continuation_file:
            # reopen to ensure file pointer is at starting point
            old_continuation_file = open(old_continuation_file, "r")
        new_continuation_file = self.open_new_continuation_file()

        with open(self.yaml_file) as open_yaml_file:
            summary = generate(
                open_yaml_file=open_yaml_file,
                user_options=self.vars,
                output_stream=output_stream,
                stopping_criteria=self.stopping_criteria,
                continuation_file=old_continuation_file,
                generate_continuation_file=new_continuation_file,
            )
            output_stream.close()

            if (new_continuation_file
                    and Path(new_continuation_file.name).exists()
                    and self.working_directory):
                shutil.copyfile(new_continuation_file.name,
                                self.default_continuation_file_path())

            if self.generate_mapping_file:
                with open(self.generate_mapping_file, "w+") as f:
                    yaml.safe_dump(mapping_from_factory_templates(summary),
                                   f,
                                   sort_keys=False)
Beispiel #2
0
    def generate_data(self, db_url, num_records, current_batch_num):
        output_stream = SqlOutputStream.from_url(db_url, self.mapping)
        old_continuation_file = self.get_old_continuation_file()
        if old_continuation_file:
            # reopen to ensure file pointer is at starting point
            old_continuation_file = open(old_continuation_file, "r")
        with self.open_new_continuation_file() as new_continuation_file:
            try:
                with open(self.yaml_file) as open_yaml_file:
                    summary = generate(
                        open_yaml_file=open_yaml_file,
                        user_options=self.vars,
                        output_stream=output_stream,
                        stopping_criteria=self.stopping_criteria,
                        continuation_file=old_continuation_file,
                        generate_continuation_file=new_continuation_file,
                    )
            finally:
                output_stream.close()

            if (new_continuation_file
                    and Path(new_continuation_file.name).exists()
                    and self.working_directory):
                shutil.copyfile(new_continuation_file.name,
                                self.default_continuation_file_path())

        mapping = snowfakery.generate_mapping_from_recipe.mapping_from_recipe_templates(
            summary)
        self.postProcessMapping(mapping)

        if self.generate_mapping_file:
            with open(self.generate_mapping_file, "w+") as f:
                yaml.safe_dump(mapping, f, sort_keys=False)
Beispiel #3
0
def configure_output_stream(dburls, mapping_file, output_format, output_files,
                            output_folder):
    assert isinstance(output_files, (list, type(None)))
    output_streams = []  # we allow multiple output streams

    for dburl in dburls:
        if mapping_file:
            with click.open_file(mapping_file, "r") as f:
                mappings = yaml.safe_load(f)
        else:
            mappings = None

        output_streams.append(SqlOutputStream.from_url(dburl, mappings))

    # JSON is the only output format (other than debug) that can go on stdout
    if output_format == "json" and not output_files:
        output_streams.append(JSONOutputStream(sys.stdout))

    if output_format == "csv":
        output_streams.append(CSVOutputStream(output_folder))

    if output_files:
        for path in output_files:
            if output_folder:
                path = Path(output_folder,
                            path)  # put the file in the output folder
            format = output_format or Path(path).suffix[1:]

            if format == "json":
                output_streams.append(JSONOutputStream(path))
            elif format == "txt":
                output_streams.append(DebugOutputStream(path))
            elif format == "dot":
                output_streams.append(GraphvizOutputStream(path))
            elif format in graphic_file_extensions:
                output_streams.append(ImageOutputStream(path, format))
            else:
                raise click.ClickException(
                    f"Unknown format or file extension: {format}")

    if len(output_streams) == 0:
        output_stream = DebugOutputStream()
    elif len(output_streams) == 1:
        output_stream = output_streams[0]
    else:
        output_stream = MultiplexOutputStream(output_streams)
    try:
        yield output_stream
    finally:
        for output_stream in output_streams:
            try:
                messages = output_stream.close()
            except Exception as e:
                messages = None
                click.echo(f"Could not close {output_stream}: {str(e)}",
                           err=True)
            if messages:
                for message in messages:
                    click.echo(message)
 def do_output(self, yaml):
     with named_temporary_file_path() as f:
         url = f"sqlite:///{f}"
         output_stream = SqlOutputStream.from_url(url, None)
         results = generate(StringIO(yaml), {}, output_stream)
         table_names = results.tables.keys()
         output_stream.close()
         engine = create_engine(url)
         with engine.connect() as connection:
             tables = {
                 table_name: list(connection.execute(f"select * from {table_name}"))
                 for table_name in table_names
             }
             return tables