Esempio n. 1
0
def parallelization(slides: Slides):
    slide = slides.new_slide()
    content = slide_header(slide, "Concurrency primitives")

    list = content.box()
    list_item(list).text("Mutexes")
    list_item(list, show="next+").text("Condition variables")
    list_item(list, show="next+").text("Atomics")
    list_item(list, show="next+").text("Synchronized queues")

    slide = slides.new_slide()
    slide.update_style("code", s(size=30))
    content = slide_header(slide, "Shared-memory parallelism")
    content.box().text("No OpenMP ☹")

    code_width = 800

    content.box(height=40)
    rayon = content.box(show="2+")
    rayon_box = rayon.box(horizontal=True)
    rayon_box.box().text("Rayon (+ Rayon adaptive")
    rayon_box.box(width=20)
    rayon_box.box(width=60).image("imgs/saurabh.png")
    rayon_box.box().text(" )")
    rayon.box(height=30)
    code_box = code(rayon.box(),
                    """
fn sum_of_squares(input: &[i32]) -> i32 {
    input.par_iter()
         .map(|&i| i * i)
         .sum()
}""",
                    width=code_width)
    code_box.line_box(1, z_level=99, x=165, width=165,
                      show="3+").rect(bg_color=CODE_HIGHLIGHT_COLOR)

    code(rayon.box(show="4+"),
         """
#[parallel]
for x in 0..10 {
    println!("{}", x);
}""",
         width=code_width)

    slide = slides.new_slide()
    slide.update_style("code", s(size=26))
    content = slide_header(slide, "Message-passing")
    code(
        content, """
let universe = mpi::initialize();
let world = universe.world();
let size = world.size();
let rank = world.rank();

if rank == 0 {
    let (msg, status) = world.any_process().receive_vec();
}
""")
Esempio n. 2
0
def rust_safety(slides: Slides):
    slide = slides.new_slide()
    slide.set_style("text", s(size=60, bold=True))

    slide.box().text("Rust is safe...", style="text")
    slide.box(show="2+").text("...but from what?", style="text")

    slide = slides.new_slide()
    content = slide_header(slide, "Undefined behaviour")
    content.box(width=800).image("imgs/cpp-undefined.png")

    slide = slides.new_slide()
    content = slide_header(slide, "UB in Java")
    content.box().text("Java::Iterator::remove")
    with_bg(content.box()).text(
        """“The behavior of an iterator is ~emph{unspecified} if the underlying
collection is modified while the iteration is in progress in any way
other than by calling this method, unless an overriding class has specified
a concurrent modification policy.”
""",
        style=s(size=28, align="left"))

    slide = slides.new_slide()
    content = slide_header(slide, "UB in Python")
    content.box().text("~tt{for} statement")
    with_bg(content.box()).text(
        """“There is a subtlety when the sequence is being modified by the loop (this can only
occur for mutable sequences, e.g. lists). An internal counter is used to keep track
of which item ... ~emph{This can lead to nasty bugs} that can be avoided by making a
temporary copy using a slice of the whole sequence ...”
""",
        style=s(size=24, align="left"))

    slide = slides.new_slide()
    content = slide_header(slide, "Sources of UB")
    list = content.box()
    items = [
        "Null pointer dereference", "Double-free", "Use-after-free",
        "Out-of-bounds access", "Integer conversion", "Integer overflow",
        "Iterator invalidation", "Invalid alignment", "…"
    ]
    for item in items:
        list_item(list, show="next+").text(item)

    content.box(height=20)
    content.box(show="next+").text(
        "Rust tries very hard to avoid all of the above",
        style=s(size=44, bold=True))
Esempio n. 3
0
def runtime(slides: Slides):
    slide = slides.new_slide()
    slide.update_style("default", s(size=40))
    content = slide_header(slide, "Minimal runtime")

    list = content.box()
    list_item(list, show="next+").text("No GC")
    list.box(width=600, height=400, show="last+",
             padding=0).image("imgs/gc.svg")
    list_item(list, show="next+").text("No exceptions")
    list_item(list, show="next+").text("Tight data layout")
    list_item(list, show="next+").text("Supports embedded platforms")
Esempio n. 4
0
def caveats(slides: Slides):
    slide = slides.new_slide()
    content = slide_header(slide, "Performance caveats")

    l1_style = s(size=26)

    list = content.box()
    list_item(list).text("Out-of-bounds checks")
    list_item(list, level=1,
              show="2+").text("Can be optimized away (iterators)",
                              style=l1_style)
    list_item(list, show="3+").text("Integer overflow is not undefined")
    list_item(list, level=1,
              show="4+").text("Runtime checks only in debug mode",
                              style=l1_style)
Esempio n. 5
0
def modules(slides: Slides):
    slide = slides.new_slide()
    slide.update_style("code", s(size=24))
    slide.set_style("bold", s(bold=True))
    content = slide_header(slide, "Proper module system")

    line = content.box(width="fill", horizontal=True)
    lib = line.box(width="50%", y=0)
    text_box = lib.box()
    text_box.box(show="1").text("foo.rs")
    text_box.overlay(show="next+").text("~bold{foo}.rs")
    code(
        lib.box(), """
pub fn fun1() {
    println!("fun1");
}
fn fun2() {
    println!("fun2");
}
""")

    main = line.box(width="50%", y=0, show="2+")
    main.box().text("main.rs")
    code(
        main.box(), """
use foo;

fn main() {
    foo::fun1();
    // foo::fun2(); private
}""")

    content.box(height=60)
    advantages = content.box(show="3+")
    advantages.update_style("default", s(size=40))
    list_item(advantages).text("visibility control")
    list_item(advantages).text("self-contained")
Esempio n. 6
0
def concurrency_issues(slides: Slides):
    slide = slides.new_slide()
    content = slide_header(slide, "Concurrency issues")

    content.box().text("Rust doesn't prevent:")
    list = content.box()
    list_item(list, show="next+").text("Deadlocks")
    list_item(list, show="next+").text("General race conditions")

    content.box(height=20)
    content.box(show="next+").text("Rust prevents (at compile time):")
    list = content.box()
    list_item(list, show="next+").text("Data races")

    slide = slides.new_slide()
    content = slide_header(slide, "What causes data races?")

    text_style = s(size=50)
    content.box(show="next+").text("Concurrent aliasing and mutability...", style=text_style)
    content.box(show="next+").text("...but Rust already disables that!", style=text_style)

    content.box(height=20)
    content.box(show="next+").text("So how do we get any concurrency at all...?", style=text_style)
Esempio n. 7
0
def borrowing(slides: Slides):
    slide = slides.new_slide()
    content = slide_header(slide, "Where's the aliasing?")
    content.box().text("So far, we only have mutability, there's no aliasing:")

    content.box(height=20)
    list = content.box()
    list_item(list, show="next+").text(
        "After a move, the original value is not accessible")
    list_item(list, show="next+").text("After a copy, a new value is created")

    slide = slides.new_slide()
    content = slide_header(slide, "Borrowing")
    content.box().text(
        "Aliasing happens when you create a reference to a value")
    content.box(show="next+").text("This is called borrowing in Rust")

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Shared borrows")

    code_width = 800

    code(content,
         """
let value = Bitmap::load(...);
let a = &value;
let b = &value;
""",
         width=code_width)

    content.box(height=20)
    list = content.box()
    list_item(
        list,
        show="next+").text("Multiple shared borrows of a value may exist")

    list.box(height=10)
    list_item(list,
              show="next+").text("You can't mutate using a shared borrow")
    list.box(height=10)
    code(list.box(show="last+"),
         "a.width = 10; // does not compile",
         width=code_width)

    list.box(height=10)
    list_item(list, show="next+").text("You can't move out of a shared borrow")
    list.box(height=10)
    code(list.box(show="last+"),
         """
fn foo(bitmap: Bitmap) { }
foo(a); // does not compile""",
         width=code_width)

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Unique borrows")

    code_width = 800

    code(content,
         """
let value = Bitmap::load(...);
let c = &mut value;
""",
         width=code_width)

    content.box(height=20)
    list = content.box()
    list_item(list, show="next+").text(
        """If a unique borrow exists, there are no other references
to the same value""",
        style=s(align="left"))
    list_item(list, show="next+").text(
        "You can only create a unique borrow if you own the value")

    list.box(height=10)
    list_item(list, show="next+").text("You can mutate using a unique borrow")
    list.box(height=10)
    code(list.box(show="last+"), "c.width = 10;", width=code_width)

    list.box(height=10)
    list_item(list, show="next+").text("You can't move out of a unique borrow")
    list.box(height=10)
    code(list.box(show="last+"),
         """
fn foo(bitmap: Bitmap) { }
foo(c); // does not compile""",
         width=code_width)

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Vector example (Rust)")

    code_width = 800
    code(content.box(show="3+"),
         """
// Vec::push
fn push(&mut self, value: T)
""",
         width=code_width)
    content.box(height=10)
    code_step(content.box(width=code_width, height=300),
              """
let vec = vec!(1, 2, 3);
let p = &vec[0];
vec.push(4);
println!("{}", p);
""",
              "1", ((0, None, None, None), (0, 1, None, None), (0, 1, 2, None),
                    (0, 1, 2, 3)),
              width=code_width)

    content.box(height=10)
    content.box(height=220, show="next+").image("imgs/borrowck-error.png")

    slide = slides.new_slide()
    slide.update_style("code", s(size=34))
    content = slide_header(slide, "What if compile time is not enough?")
    content.box().text(
        """If you can't prove to the compiler that your borrows are safe,
borrow checking can be done at runtime.""")
    content.box(
        show="next+").text("If any rules are broken, the program panics.")

    content.box(height=20)
    code_box = code(
        content.box(show="next+"), """
let value = RefCell::new(5);
let a = value.borrow();     // shared borrow
let b = value.borrow_mut(); // unique borrow""")
    pointer_to_line(slide,
                    code_box,
                    2,
                    100,
                    600,
                    "4+",
                    textbox_pos=("40%", 0),
                    code_pos=("40%", "100%")).text(
                        """This would panic, since there already is
a shared borrow""",
                        style=s(color="orange", align="left"))
Esempio n. 8
0
def ownership(slides: Slides):
    slide = slides.new_slide()
    content = slide_header(slide, "Memory safety using the type system")
    list = content.box()
    list_item(list).text("Ownership")
    list_item(list, show="next+").text("Borrowing")
    list_item(list, show="next+").text("Lifetimes")

    slide = slides.new_slide()
    content = slide_header(slide, "Ownership")
    content.box().text("Every value in Rust has exactly one owner", s(size=50))
    content.box(height=10)
    content.box(show="next+").text(
        "When that owner goes out of scope, the value is dropped",
        style=s(size=36))

    def person_slide(end=""):
        slide = slides.new_slide()
        slide.update_style("code", s(size=50))
        content = slide_header(slide, "Ownership")
        return (slide,
                code(content,
                     """
fn foo(bitmap: Bitmap) {{
    ...
}}{}""".format(end),
                     width=840))

    (slide, box) = person_slide()
    pointer_to_line(slide,
                    box,
                    0,
                    100,
                    150,
                    "2+",
                    textbox_pos=("50%", "100%"),
                    code_pos=("40%", "10%")).text(
                        """No one else has any access to `bitmap`.
It can be mutated arbitrarily.""",
                        style=s(color="orange", size=40))

    person_slide(" // bitmap is dropped here")

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Ownership - move semantics")
    code_box = code(
        content.box(), """
fn foo(bitmap: Bitmap) { ... }

fn main() {
    let bitmap = Bitmap::load(...);
    foo(bitmap);
    ...
}
""")
    pointer_to_line(slide,
                    code_box,
                    4,
                    200,
                    120,
                    "2+",
                    textbox_pos=("40%", "100%"),
                    code_pos=("46%", "60%")).text("""`bitmap` is moved here.
It will not be `dropped` in the current scope.
""",
                                                  style=s(color="orange",
                                                          align="left"))

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Ownership - move semantics")
    code(
        content.box(), """
fn foo(bitmap: Bitmap) { ... }

fn main() {
    let bitmap = Bitmap::load(...);
    foo(bitmap);
    println!("{}", bitmap.width);
}
""")
    content.box(height=20)
    content.box(height=180, show="2+").image("imgs/ownership-moved.png")

    slide = slides.new_slide()
    content = slide_header(slide, "Constructors")
    list = content.box()
    list_item(list).text("Move constructors? Nope.")
    list_item(list, show="next+").text("Move assignment constructors? Nope.")

    slide = slides.new_slide()
    content = slide_header(slide, "Why are they needed in C++?")
    content.box(height=600).image("imgs/meme-lvalue.jpg")

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Why are they needed in C++?")
    box = code(
        content, """
void foo(Bitmap&& bitmap) { ... }

Bitmap bitmap(...);
foo(std::move(bitmap));
std::cout << bitmap.width << std::endl;""", "cpp")
    pointer_to_line(slide,
                    box,
                    4,
                    100,
                    600,
                    "2+",
                    textbox_pos=("40%", "0"),
                    code_pos=("40%", "100%")).text(
                        """`bitmap` is still accessible here.
It will be `dropped` at the end of scope.
Its state HAD to be reset in the move constructor.""",
                        style=s(color="orange", align="left"))

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, '"Copy" semantics')
    content.box().text("""Values are copied instead of moved
if they implement the `Copy` trait""",
                       style=s(bold=True))

    content.box(height=20)
    content.box(show="next+").text("Types are `Copy` if:")
    list = content.box()
    list_item(list,
              show="next+").text("they are primitive (integers, floats, etc.)")
    list_item(list, show="next+").text("they are marked as Copy")

    content.box(height=20)
    code(content.box(show="next+"),
         """
#[derive(Copy)]
struct Person {
    age: u32,
    male: bool
}""",
         width=500)

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, '"Copy" semantics')
    box = code(
        content.box(), """
fn foo(num: u32) { ... }

let number = 5;
foo(number);
println!("{}", number); // no error""")

    pointer_to_line(slide,
                    box,
                    3,
                    200,
                    600,
                    "2+",
                    textbox_pos=("40%", "0"),
                    code_pos=("34%", "60%")).text("""`number` is copied here.
    It can be still accessed after the call.""",
                                                  style=s(color="orange"))
Esempio n. 9
0
def outro(slides: SlideDeck):
    slide = new_slide(slides)
    content = slide_header(slide, "There are many other effects")
    list_wrapper = content.box()
    list_item(list_wrapper).text("NUMA")
    list_item(list_wrapper).text("4k aliasing")
    list_item(list_wrapper).text("Misaligned accesses, cache line boundaries")
    list_item(list_wrapper).text("Instruction data dependencies")
    list_item(list_wrapper).text("Software prefetching")
    list_item(list_wrapper).text("Non-temporal stores & cache pollution")
    list_item(list_wrapper).text("Bandwidth saturation")
    list_item(list_wrapper).text("DRAM refresh intervals")
    list_item(list_wrapper).text("AVX/SSE transition penalty")
    list_item(list_wrapper).text("...")

    slide = new_slide(slides)
    slide.box().text("Thank you!", s(bold=True, size=60))
    slide.box(p_top=60).text(
        """For more examples visit:
~tt{github.com/kobzol/hardware-effects}""", s(size=44))
    slide.box(p_top=80).text("Jakub Beránek")

    slide.box(p_top=100).text(
        "Slides built with ~tt{github.com/spirali/elsie}", s(size=30))
Esempio n. 10
0
def shared_state(slides: Slides):
    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Spawning a thread")

    code(content.box(), "fn spawn<F: Fn + Send>(f: F)")

    content.box(height=20)
    content.box(show="next+").text("""Ownership of T can be transferred to another thread
only if T implements the ~emph{Send} trait""")

    content.box(height=20)
    content.box(show="next+").text("""Send is implemented automatically, unless the type
contains values that are not safe to be transferred between threads""", style=s(size=30))

    slide = slides.new_slide()
    slide.update_style("code", s(size=34))
    content = slide_header(slide, "Shared state concurrency")
    content.box().text("Goal:", style=s(bold=True))
    list = content.box()
    list_item(list, show="next+").text("Spawn a thread")
    list_item(list, show="next+").text("Send a reference to some value to it")
    list_item(list, show="next+").text("Modify the value in the spawned thread")
    list_item(list, show="next+").text("Read the value in the original thread")

    slide = slides.new_slide()
    slide.update_style("code", s(size=34))
    (content, header) = slide_header(slide, "Shared state concurrency", True)
    box = header.box(width=160, y=80)
    box.image("imgs/meme-face-1.jpg")
    box.overlay(show="4").image("imgs/meme-face-2.jpg")

    code_step(content.box(width=800, height=350), """
let value = 5;
let p = &value;
thread::spawn(|| {
    println!("{}", *p);
});
""", 1, [(0, None, None, None, None),
         (0, 1, None, None, None),
         (0, 1, 2, 3, 4)], width=500)

    content.box(height=10)
    with_border(content, show="4+").box(height=220).image("imgs/concurrent-error-1.png")

    slide = slides.new_slide()
    slide.update_style("code", s(size=34))
    (content, header) = slide_header(slide, "Shared state concurrency", True)
    box = header.box(width=160, y=80)
    box.image("imgs/meme-face-2.jpg")
    box.overlay(show="4-5").image("imgs/meme-face-3.png")
    box.overlay(show="6+").image("imgs/meme-face-4.png")

    code_step(content.box(width=800, height=350), """
let p = Rc::new(5);
thread::spawn(|| {
    println!("{}", *p);
});
""", 1, [(0, None, None, None),
         (0, 1, None, None),
         (0, 1, 2, 3),
         (0, 1, 2, 3),
         (0, "thread::spawn(move || {", 2, 3)], width=500)

    border_box = content.box(width=1000, height=220)
    box = with_border(border_box.overlay(), show="4").box(width=800, height=180)
    box.box(show="4", height=220).image("imgs/concurrent-error-2.png")
    box = with_border(border_box.overlay(), show="6+").box(width=800, height=180)
    box.box(show="6+", width=900).image("imgs/concurrent-error-3.png")

    slide = slides.new_slide()
    slide.update_style("code", s(size=34))
    (content, header) = slide_header(slide, "Shared state concurrency", True)
    box = header.box(width=160, y=80)
    box.overlay(show="1-2").image("imgs/meme-face-6.jpg")
    box.overlay(show="3+").image("imgs/meme-face-5.jpg")

    content.box(height=60)
    code_step(content.box(width=800, height=260), """
let p = Arc::new(5);
thread::spawn(move || {
    println!("{}", *p);
});
println!("{}", *p);
""", 1, [(0, 1, 2, 3, None),
         (0, 1, 2, 3, 4),
         ], width=500)
    with_border(content, show="3+").box(width=1000).image(
        "imgs/concurrent-error-4.png")

    slide = slides.new_slide()
    slide.update_style("code", s(size=34))
    (content, header) = slide_header(slide, "Shared state concurrency", True)
    box = header.box(width=160, y=80, show="3+")
    box.image("imgs/meme-face-7.png")

    code_width = 800
    code(content.box(show="2+"), "fn clone(&self) -> Arc<T>;", width=code_width)

    content.box(height=20)
    code(content.box(), """
let p = Arc::new(5);
let tp = p.clone();
thread::spawn(move || {
    println!("{}", *tp);
});
println!("{}", *p);
""", width=code_width)

    content.box(height=10)
    content.box(show="2+").text("""Clone() creates a new Arc.
Multiple variables remove aliasing.""")
    content.box(show="3+").text("Arc only provides ~emph{read-only} access (shared borrow).")

    slide = slides.new_slide()
    slide.update_style("code", s(size=34))
    slide.set_style("code2", slide.get_style("code").compose(s(size=32)))
    content = slide_header(slide, "Shared state concurrency")

    code_width = 800
    code(content.box(show="4+"), """
// Mutex::lock
fn lock(&self) -> &mut T;""", code_style="code2", width=code_width)

    content.box(height=20)
    code_step(content.box(width=code_width, height=320), """
let p = Arc::new(Mutex::new(5));
let tp = p.clone();
thread::spawn(move || {
    *tp.lock() = 10;
});
println!("{}", *p.lock());""", "1", (
        ("                 Mutex::new(5)  ", None, None, None, None, None),
        (0, None, None, None, None, None),
        (0, 1, 2, 3, 4, 5)
    ), width=code_width)
Esempio n. 11
0
def unsafe(slides: Slides):
    slide = slides.new_slide()
    slide.update_style("code", s(size=32))
    content = slide_header(slide, "Where's the catch?")
    content.box().text("We have seen things that mutate through a shared borrow")

    code_width = 800
    code_step(content.box(width=code_width, height=400), """
// Arc::clone
fn clone(&self) -> Arc<T>;
// Mutex::lock
fn lock(&self) -> &mut T;
// AtomicU64::store
fn store(&self, val: u64, order: Ordering);
""", 2, [
        (0, 1, None, None, None, None),
        (0, 1, 2, 3, None, None),
        (0, 1, 2, 3, 4, 5)
    ], width=code_width)

    content.box(show="5+").text("This is called ~tt{interior mutability} and requires unsafe Rust",
                                s(size=32))

    slide = slides.new_slide()
    content = slide_header(slide, "Enter unsafe Rust")
    content.box().text("Some scenarios are not expressible in (safe) Rust")

    content.box(height=20)
    content.box(show="next+").text("In some cases, something more is required to:")

    list = content.box()
    list_item(list, show="next+").text("Express inherently unsafe paradigms")
    list_item(list, show="next+").text("Improve performance")
    list_item(list, show="next+").text("Interact with I/O, OS, hardware, network")

    slide = slides.new_slide()
    content = slide_header(slide, "Unsafe Rust")

    content.box().text("You can mark parts of code with the ~tt{unsafe} keyword")
    content.box(show="next+").text("Unsafe Rust is a ~emph{superset} of Rust")

    def unsafe_slide(header, code_body, content_show="1", code_size=36):
        slide = slides.new_slide()
        slide.update_style("code", s(size=code_size))
        content = slide_header(slide, "Unsafe Rust")
        content.box(y=0).text("Unsafe Rust allows:")

        content.box(height=20)
        content.box(show=content_show).text(header)
        content.box(height=10)
        code(content.box(show=content_show), code_body)

    unsafe_slide("Accessing a global mutable variable", """
static mut COUNTER: u32 = 0;

fn increment_count() {
    unsafe {
        COUNTER += 1;
    }
}""", content_show="2+")
    unsafe_slide("Dereferencing a raw pointer", """
let ptr = 0xCAFECAFE as *mut u32;
unsafe {
    *ptr = 5;
}""")
    unsafe_slide("Calling an unsafe function", """
unsafe {
    zlib_compress(&buffer, buffer.len());
}""")
    unsafe_slide("Implementing an unsafe trait", """
unsafe impl Send for MySuperSafeType {
    ...
}""")

    slide = slides.new_slide()
    slide.update_style("code", s(size=18))
    content = slide_header(slide, "Finding unsafe code - C++")
    code_box = code(content, """
std::atomic<LifecycleId> ArenaImpl::lifecycle_id_generator_;
GOOGLE_THREAD_LOCAL ArenaImpl::ThreadCache ArenaImpl::thread_cache_ = {-1, NULL};

void ArenaImpl::Init() {
  lifecycle_id_ =
      lifecycle_id_generator_.fetch_add(1, std::memory_order_relaxed);
  hint_.store(nullptr, std::memory_order_relaxed);
  threads_.store(nullptr, std::memory_order_relaxed);

  if (initial_block_) {
    // Thread which calls Init() owns the first block. This allows the
    // single-threaded case to allocate on the first block without having to
    // perform atomic operations.
    new (initial_block_) Block(options_.initial_block_size, NULL);
    SerialArena* serial =
        SerialArena::New(initial_block_, &thread_cache(), this);
    serial->set_next(NULL);
    threads_.store(serial, std::memory_order_relaxed);
    space_allocated_.store(options_.initial_block_size,
                           std::memory_order_relaxed);
    CacheSerialArena(serial);
  } else {
    space_allocated_.store(0, std::memory_order_relaxed);
  }
}
""")
    code_box.overlay(show="2+", z_level=99).rect(bg_color=CODE_HIGHLIGHT_COLOR)

    slide = slides.new_slide()
    content = slide_header(slide, "Finding unsafe code - Rust")
    bash(content.box(), '$ grep "unsafe" main.rs', text_style=s(size=40))

    slide = slides.new_slide()
    slide.box().text("""Rust builds safe abstractions
on top of unsafe foundations""", s(size=50))
Esempio n. 12
0
def design(slides: Slides):
    slide = slides.new_slide()
    content = slide_header(slide, "Design by community")

    list = content.box()
    list_item(list).text("Open source")
    list_item(list).text("RFC")
    list.box(width=700, height=200, p_top=40).image("imgs/rust-rfc.png")

    slide = slides.new_slide()
    content = slide_header(slide, "Backwards compatibility")

    small = s(size=28)

    list = content.box()
    list_item(list).text("Strong BC guarantees")
    list_item(list, show="2+").text("New version every 6 weeks")
    list_item(list, show="2+", level=1).text(
        "Thousands of libraries tested to spot regressions", style=small)
    list_item(list, show="3+").text("Big changes => new edition")
    list_item(list, show="3+", level=1).text("Rust 2015 vs Rust 2018",
                                             style=small)

    slide = slides.new_slide()
    slide.update_style("code", s(size=38))
    content = slide_header(slide, "Unstable features")

    code(content.box(), """
#![feature(async_await)]
async fn foo() {
    ...
}""")

    content.box(height=20)
    bash(content.box(show="2+"), "$ cargo +nightly build")
Esempio n. 13
0
def hw_complexity(slides: SlideDeck):
    slide = new_slide(slides)
    content = slide_header(slide, "Why should we care?")
    list_wrapper = content.box()
    list_item(list_wrapper).text("We write code for the C++ abstract machine")
    list_item(list_wrapper, show="next+").text(
        "Intel CPUs fulfill the contract of this abstract machine")
    list_item(list_wrapper, level=1,
              show="next+").text("But inside they can do whatever they want")
    list_item(list_wrapper, show="next+").text(
        "Understanding CPU trade-offs can get us more performance")

    slide = new_slide(slides)
    slide.update_style("code", s(size=50))
    content = slide_header(slide, "C++ abstract machine example")
    code(
        content.box(), """void foo(int* arr, int count)
{
    for (int i = 0; i < count; i++)
    {
        arr[i]++;
    }
}""")
    content.box(p_top=20).text("How fast are the individual array increments?",
                               s(size=40))

    slide = new_slide(slides)
    content = slide_header(slide, "Hardware effects")
    list_wrapper = content.box()
    list_item(list_wrapper).text(
        "Performance effects caused by a specific CPU/memory implementation")
    list_item(list_wrapper, show="next+").text(
        "Demonstrate some CPU/memory trade-off or assumption")
    list_item(list_wrapper,
              show="next+").text("Impossible to predict from (C++) code alone")

    slide = new_slide(slides)
    content = slide_header(slide, "Hardware is getting more and more complex")
    content.box(height=600).image("images/moores-law.png")
    content.box().text("Source: karlrupp.net", s(size=30))

    slide = new_slide(slides)
    content = slide_header(slide, "Microarchitecture (Haswell)")
    content.box(height=600).image("images/haswell-diagram.svg")
    # Heuristics, assumptions, fast paths/slow paths
    content.box().text(
        "Source: Intel Architectures Optimization Reference Manual",
        s(size=30))

    slide = new_slide(slides)
    content = slide_header(slide, "How bad is it?")
    list_wrapper = content.box()
    cpp = list_item(list_wrapper).box(horizontal=True)
    cpp.box().text("C++ 17 final draft: ")
    cpp.box(show="next+").text(" 1622 pages")
    intel = list_item(list_wrapper, show="next+").box(horizontal=True)
    intel.box().text("Intel x86 manual: ")
    intel.box(show="next+").text(" ~bold{5764} pages!")
    content.box(y=580).text(
        """http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4659.pdf
https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf
https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf""",
        s(size=14, align="left"))

    slide = new_slide(slides)
    content = slide_header(slide, "Plan of attack")
    list_wrapper = content.box()
    list_item(list_wrapper).text("Show example C++ programs")
    list_item(list_wrapper, level=1,
              show="next+").text("short, (hopefully) comprehensible")
    list_item(list_wrapper, level=1,
              show="next+").text("compiled with ~tt{-O3}")
    list_item(list_wrapper,
              show="next+").text("Demonstrate weird performance behaviour")
    list_item(list_wrapper,
              show="next+").text("Let you guess what might cause it")
    list_item(list_wrapper, show="next+").text("Explain (a possible) cause")
    list_item(list_wrapper,
              show="next+").text("Show how to measure and fix it")
    list_item(
        list_wrapper, show="next+",
        p_top=20).text("Disclaimer #1: Everything will be Intel x86 specific")
    list_item(list_wrapper, show="next+").text(
        "Disclaimer #2: I'm not an expert on this and I may be wrong :-)")

    slide = new_slide(slides)
    slide.box().text("""Let's see some examples...""", s(bold=True, size=40))
Esempio n. 14
0
def branch_prediction(slides: SlideDeck, backup: bool):
    if backup:
        slide = new_slide(slides)
        content = slide_header(slide, "Code (backup)")
        code(
            content.box(),
            """std::vector<float> data = /* 32K random floats in [1, 10] */;
float sum = 0;
// std::sort(data.begin(), data.end());
for (auto x : data)
{
    if (x < 6.0f)
    {
        sum += x;
    }
}""")
        slide = new_slide(slides)
        content = slide_header(slide, "Result (backup)")
        content.box(width="90%").image("images/example0a-time.png")

    slide = new_slide(slides)
    content = slide_header(slide, "Most upvoted Stack Overflow question")
    content.box(height=600).image("images/stack-overflow.png")

    slide = new_slide(slides)
    content = slide_header(slide,
                           "What is going on? (Intel Amplifier - VTune)")
    content.box(height=600).image("images/vtune.png")

    slide = new_slide(slides)
    content = slide_header(slide, "What is going on? (perf)")
    bash(content.box(),
         """$ perf stat ./example0a --benchmark_filter=nosort
    853,672012  task-clock (msec) #   0,997 CPUs utilized
            30  context-switches  #   0,035 K/sec          
             0  cpu-migrations    #   0,000 K/sec
           199  page-faults       #   0,233 K/sec
 3 159 530 915  cycles            #   3,701 GHz
 1 475 799 619  instructions      #   0,47  insn per cycle
   419 608 357  branches          # 491,533 M/sec
   102 425 035  branch-misses     #  24,41% of all branches""",
         text_style=s(align="left", size=34))

    slide = new_slide(slides)
    content = slide_header(slide, "Branch predictor")
    content.box(height=600).image("images/haswell-diagram.png")
    content.box(width=130, height=40, x=595, y=140).rect(color=COLOR_FRONTEND,
                                                         stroke_width=3)

    slide = new_slide(slides)
    content = slide_header(slide, "CPU pipeline 101")
    content.box(height=400).image("images/branch-miss-pipeline.svg")

    slide = new_slide(slides)
    content = slide_header(slide, "Branch predictor")
    list_wrapper = content.box()
    list_item(list_wrapper).text("CPU tries to predict results of branches")
    list_item(list_wrapper,
              show="next+").text("Misprediction can cost ~15-20 cycles!",
                                 escape_char="#")

    slide = new_slide(slides)
    content = slide_header(slide, "Simple branch predictor - unsorted array")
    code(content.box(p_bottom=40), """if (data[i] < 6) {
    ...
}""")

    row = content.box(horizontal=True)
    box_dimension = 100

    def array(numbers, predictions, start=1, needle=6):
        stroke_width = 2
        size = 36
        for i in range(len(numbers)):
            box = row.box(width=box_dimension,
                          height=box_dimension).rect(color="black",
                                                     stroke_width=stroke_width)
            number = str(numbers[i])
            box.text(number, s(bold=True, size=size))

            predicted_correctly = (predictions[i] and numbers[i] < needle) or (
                not predictions[i] and numbers[i] >= needle)
            prediction = "green" if predicted_correctly else "red"
            show_overlay = "{}+".format(start + i * 2 + 1)
            overlay = box.overlay(show=show_overlay).rect(
                color="black", bg_color=prediction, stroke_width=stroke_width)
            overlay.text(number, s(color="white", bold=True, size=size))
            show_text = start + i * 2
            row.box(x=i * box_dimension,
                    y=box_dimension,
                    width=box_dimension,
                    show="{}-{}".format(show_text,
                                        show_text + 1)).text("{} < {}?".format(
                                            number, needle))

    values = [6, 2, 1, 7, 4, 8, 3, 9]
    text_style = s(align="left", size=42)
    width = 400

    def predict_sequence(wrapper: Box, values, start=1):
        for i in range(len(values)):
            value = "Taken" if values[i] else "Not taken"
            show_start = start + i * 2
            wrapper.overlay(
                show="{}-{}".format(show_start, show_start + 1)).rect(
                    bg_color="white").text("Prediction: {}".format(value),
                                           style=text_style)

    def predict_value(index):
        if index == 0:
            return False
        return values[index - 1] < 6

    predictions = [predict_value(i) for i in range(len(values))]

    array(values, predictions, start=2)
    prediction_wrapper = content.box(p_top=60,
                                     width=width).text("Prediction: Not taken",
                                                       style=text_style)
    predict_sequence(prediction_wrapper, predictions, start=1)

    content.box(show="next+", p_top=40).text("2 hits, 6 misses (25% hit rate)")

    slide = new_slide(slides)
    content = slide_header(slide, "Simple branch predictor - sorted array")
    code(content.box(p_bottom=40), """if (data[i] < 6) {
    ...
}""")

    row = content.box(horizontal=True)
    values = [1, 2, 3, 4, 6, 7, 8, 9]
    predictions = [predict_value(i) for i in range(len(values))]

    array(values, predictions, start=2)
    prediction_wrapper = content.box(p_top=60,
                                     width=width).text("Prediction: Not taken",
                                                       style=text_style)
    predict_sequence(prediction_wrapper, predictions, start=1)

    content.box(show="next+", p_top=40).text("6 hits, 2 misses (75% hit rate)")

    if backup:
        size = 40
        slide = new_slide(slides)
        content = slide_header(slide, "How can the compiler help?")
        row = content.box(horizontal=True)
        row.box(y=0, p_right=50, width=400).image("images/bm-float-code.png")
        row.box(y=0, width=600).image("images/bm-float-bin.png")
        content.box(p_top=20).text(
            "With ~tt{float}, there are two branches per iteration",
            style=s(size=size))

        slide = new_slide(slides)
        content = slide_header(slide, "How can the compiler help?")
        row = content.box(horizontal=True)
        row.box(y=0, p_right=50, width=400).image("images/bm-int-code.png")
        row.box(y=0, width=600).image("images/bm-int-bin.png")
        content.box(p_top=20).text(
            "With ~tt{int}, one branch is removed (using ~tt{cmov})",
            style=s(size=size))

    slide = new_slide(slides)
    content = slide_header(slide, "How to measure?")
    content.box().text("~tt{branch-misses}", style=s(size=48))
    content.box(p_top=20).text("How many times was a branch mispredicted?")

    if backup:
        bash(content.box(p_top=40, show="next+"),
             """$ perf stat -e branch-misses ./example0a
with    sort ->     383 902
without sort -> 101 652 009""",
             text_style=s(align="left"))

    slide = new_slide(slides)
    slide.update_style("code", s(size=40))
    content = slide_header(slide, "How to help the branch predictor?")
    list_wrapper = content.box()
    list_item(list_wrapper).text("More predictable data")
    list_item(list_wrapper, show="next+").text("Profile-guided optimization")
    list_item(list_wrapper,
              show="next+").text("Remove (unpredictable) branches")
    list_item(list_wrapper,
              show="next+").text("Compiler hints (use with caution)")
    code(
        list_wrapper.box(show="last+", p_top=20, p_bottom=20),
        """if (__builtin_expect(will_it_blend(), 0)) {
    // this branch is not likely to be taken
}""")

    slide = new_slide(slides)
    content = slide_header(slide, "Branch target prediction")
    list_wrapper = content.box()
    list_item(list_wrapper).text(
        "Target of a jump is not known at compile time:")
    list_item(list_wrapper, show="next+", level=1).text("Function pointer")
    list_item(list_wrapper, show="next+",
              level=1).text("Function return address")
    list_item(list_wrapper, show="next+", level=1).text("Virtual method")

    if backup:
        slide = new_slide(slides)
        slide.update_style("code", s(size=26))
        content = slide_header(slide, "Code (backup)")
        code(
            content,
            """struct A { virtual void handle(size_t* data) const = 0; };
struct B: public A { void handle(size_t* data) const final { *data += 1; } };
struct C: public A { void handle(size_t* data) const final { *data += 2; } };

std::vector<std::unique_ptr<A>> data = /* 4K random B/C instances */;
// std::sort(data.begin(), data.end(), /* sort by instance type */);
size_t sum = 0;
for (auto& x : data)
{
    x->handle(&sum);
}""")

        slide = new_slide(slides)
        content = slide_header(slide, "Result (backup)")
        content.box(width="90%").image("images/example0b-time.png")

        slide = new_slide(slides)
        content = slide_header(slide, "perf (backup)")
        bash(content.box(),
             """$ perf stat -e branch-misses ./example0b
with sort   ->     337 274
without sort -> 84 183 161""",
             text_style=s(align="left"))
Esempio n. 15
0
def cache_conflicts(slides: SlideDeck, backup: bool):
    if backup:
        slide = new_slide(slides)
        content = slide_header(slide, "Code (backup)")
        code(
            content.box(),
            """// Addresses of N integers, each `offset` bytes apart
std::vector<int*> data = ...;
for (auto ptr: data)
{
    *ptr += 1;
}
// Offsets: 4, 64, 4000, 4096, 4128""")
        slide = new_slide(slides)
        content = slide_header(slide, "Result (backup)")
        content.box(height=600).image("images/example1-time.png")

    slide = new_slide(slides)
    content = slide_header(slide, "Cache memory")
    content.box(height=600).image("images/haswell-diagram.png")
    content.box(width=230, height=40, x=816, y=530).rect(color=COLOR_BACKEND,
                                                         stroke_width=3)

    slide = new_slide(slides)
    content = slide_header(slide, "How are (L1) caches implemented")
    list_wrapper = content.box()
    list_item(list_wrapper).text("N-way set associative table")
    list_item(list_wrapper, level=1, show="last+").text("Hardware hash table")
    list_item(list_wrapper, show="next+").text("Key = address (8B)")
    list_item(list_wrapper, show="next+").text("Entry = cache line (64B)")

    slide = new_slide(slides)
    content = slide_header(slide, "N-way set associative cache")
    hash_size = 8
    hash_dimension = 60

    def table(wrapper: Box,
              size,
              dimension,
              buckets=None,
              bucket_indices=True):
        htable = wrapper.box(horizontal=True)
        items = []
        for i in range(size):
            cell = htable.box(width=dimension,
                              height=dimension,
                              horizontal=True).rect("black", stroke_width=2)
            items.append(cell)

        if buckets:
            bucket_width = int((size / buckets) * dimension)
            for i in range(buckets):
                pos = i * bucket_width
                htable.box(x=pos, y=0, width=bucket_width,
                           height=dimension).rect("black", stroke_width=6)
                if bucket_indices:
                    htable.box(x=pos, y=dimension - 5,
                               width=bucket_width).text(str(i))

        return (htable, items)

    content.box().text("Size = {} cache lines".format(hash_size),
                       style="notice")
    (htable, hitems) = table(content.box(p_top=20), hash_size, hash_dimension)
    arrow_wrapper = content.box()
    arrow = Arrow(20)
    arrow_wrapper.box().line([
        hitems[0].p("50%", 0).add(0, -20),
        hitems[-1].p("50%", 0).add(0, -20),
    ],
                             start_arrow=arrow,
                             end_arrow=arrow,
                             stroke_width=5,
                             color=COLOR_NOTE)

    content.box(
        p_top=20,
        show="next+").text("Associativity (N) - # of cache lines per bucket")
    content.box(p_top=10, show="next+").text("# of buckets = Size / N")

    row = content.box(horizontal=True, p_top=20)
    lcol = row.box(y=0)
    rcol = row.box(y=0, p_left=20)

    def htable_row(text, block_count):
        padding = 20
        height = 110
        lcol.box(show="next+", p_top=padding, height=height).text(text)
        return table(rcol.box(show="last+", p_top=padding, height=height),
                     hash_size, hash_dimension, block_count)

    htable_row("N = 1 (direct mapped)", hash_size)
    htable_row("N = {} (fully associative)".format(hash_size), 1)
    htable_row("N = 2", hash_size // 2)

    slide = new_slide(slides)
    content = slide_header(slide, "How are addresses hashed?")

    content.box().text("64-bit address:")
    row = content.box(horizontal=True, width=800)
    widths = ["60%", "25%", "15%"]
    address_colors = ["#B22222", "#007944", "#0018AE"]
    labels = ["Tag", "Index", "Offset"]

    for i in range(3):
        wrapper = row.box(width=widths[i]).rect(color=address_colors[i],
                                                stroke_width=4)
        wrapper.box(padding=4).text(labels[i])
    labelrow = content.box(horizontal=True, x=220)
    labelrow.box().text("63")
    labelrow.box(p_left=770).text("0")

    list_wrapper = content.box(p_top=20)
    list_item(list_wrapper, show="next+").text("Offset", "bold")
    list_item(list_wrapper, level=1,
              show="last+").text("Selects byte within a cache line")
    list_item(list_wrapper, level=1,
              show="last+").text("log2(cache line size) bits")
    list_item(list_wrapper, show="next+").text("Index", "bold")
    list_item(list_wrapper, level=1,
              show="last+").text("Selects bucket within the cache")
    list_item(list_wrapper, level=1,
              show="last+").text("log2(bucket count) bits")
    list_item(list_wrapper, show="next+").text("Tag", "bold")
    list_item(list_wrapper, level=1, show="last+").text("Used for matching")

    slide = new_slide(slides)
    content = slide_header(slide, "N-way set associative cache")

    queue = content.box(x="55%", y=40, horizontal=True)
    queue.box(p_right=40).text("Cache lines:")
    colors = ("#F0134D", "#FF6F5E", "#F0134D")
    cacheline_labels = ("A", "B", "C")
    for i in range(3):
        queue.box(width=hash_dimension,
                  height=hash_dimension).rect(color="black",
                                              bg_color=colors[i],
                                              stroke_width=5).text(
                                                  cacheline_labels[i],
                                                  style=s(bold=True,
                                                          color="white"))

    index = content.box(x="55%", y=90, horizontal=True)
    index.box(p_right=75).text("Index bits:")
    index_bits = (0, 1, 0)
    for i in range(3):
        index.box(width=hash_dimension,
                  height=hash_dimension).text(str(index_bits[i]))

    def insert(slot, show, item):
        wrapper = slot.overlay(show=show)
        wrapper.rect(bg_color=colors[item])
        wrapper.text(cacheline_labels[item], style=s(bold=True, color="white"))

    row = content.box(horizontal=True, p_top=20)
    lcol = row.box(y=0)
    rcol = row.box(y=0, p_left=20)

    def htable_row(text, block_count):
        padding = 20
        height = 140
        lcol.box(show="next+", p_top=padding, height=height).text(text)
        return table(rcol.box(show="last+", p_top=padding, height=height),
                     hash_size, hash_dimension, block_count)

    (_, hitems) = htable_row("N = 1", hash_size)
    insert(hitems[0], "next+", 0)
    insert(hitems[1], "next+", 1)
    insert(hitems[0], "next+", 2)

    (_, hitems) = htable_row("N = {}".format(hash_size), 1)
    insert(hitems[0], "next+", 0)
    insert(hitems[1], "next+", 1)
    insert(hitems[2], "next+", 2)

    (_, hitems) = htable_row("N = 2", hash_size // 2)
    insert(hitems[0], "next+", 0)
    insert(hitems[2], "next+", 1)
    insert(hitems[1], "next+", 2)

    slide = new_slide(slides)
    slide.update_style("default", s(size=46))
    slide.update_style("bold", s(size=46))
    content = slide_header(slide, "Intel L1 cache")
    bash(content.box(),
         """$ getconf -a | grep LEVEL1_DCACHE
LEVEL1_DCACHE_SIZE      32768
LEVEL1_DCACHE_ASSOC     8
LEVEL1_DCACHE_LINESIZE  64""",
         text_style=s(align="left"))
    list_wrapper = content.box(p_top=20)
    list_item(
        list_wrapper,
        show="next+").text("~bold{Cache line size} - 64 B (6 offset bits)")
    list_item(list_wrapper, show="next+").text("~bold{Associativity} (N) - 8")
    list_item(list_wrapper, show="next+").text("~bold{Size} - 32768 B")
    list_item(list_wrapper, show="next+").text("32768 / 64 => 512 cache lines")
    list_item(list_wrapper,
              show="next+").text("512 / 8 => 64 buckets (6 index bits)")

    slides.set_style("tag", s(color=address_colors[0]))
    tag = slides.get_style("tag")
    slides.set_style("index", tag.compose(s(color=address_colors[1])))
    slides.set_style("offset", tag.compose(s(color=address_colors[2])))

    styles = ["tag", "index", "offset"]
    colors = ["#F0134D", "#FF6F5E", "#1F6650", "#40BFC1"]

    def address(cols, content, next=True, use_style=True, row=0):
        for i, col in enumerate(cols):
            show = "1+"
            if next:
                show = "next+" if i == 0 else "last+"

            style = "default"
            if use_style:
                if i == 0:
                    style = s(color=colors[row])
                else:
                    style = styles[i - 1]
            col.box(show=show).text(content[i], style=style)

    slide = new_slide(slides)
    content = slide_header(slide, "Offset = 4B")

    width = 700
    columns = 4
    row = content.box(horizontal=True)
    cols = [row.box(width=width // columns) for _ in range(columns)]

    address(cols, ("Number", "Tag", "Index", "Offset"),
            next=False,
            use_style=False)
    address(cols, ("A", "..100000", "000000", "000000"), next=False)
    address(cols, ("B", "..100000", "000000", "000100"), row=1)
    address(cols, ("C", "..100000", "000000", "001000"), row=2)
    address(cols, ("D", "..100000", "000000", "001100"), row=3)

    hash_dimension = 80
    (htable, hitems) = table(content.box(p_top=20), hash_size, hash_dimension,
                             hash_size // 2)
    for i in range(4):
        hitems[0].box(show="{}+".format(i + 1),
                      width=hash_dimension // 4,
                      height=hash_dimension).rect(bg_color=colors[i])

    list_wrapper = content.box(p_top=40)
    list_item(
        list_wrapper,
        show="next+").text("Same bucket, same cache line for each number")
    list_item(list_wrapper,
              show="next+").text("Most efficient, no space is wasted")

    slide = new_slide(slides)
    content = slide_header(slide, "Offset = 64B")

    row = content.box(horizontal=True)
    cols = [row.box(width=width // columns) for _ in range(columns)]

    address(cols, ("Number", "Tag", "Index", "Offset"),
            next=False,
            use_style=False)
    address(cols, ("A", "..100000", "000000", "000000"), next=False)
    address(cols, ("B", "..100000", "000001", "000000"), row=1)
    address(cols, ("C", "..100000", "000010", "000000"), row=2)
    address(cols, ("D", "..100000", "000011", "000000"), row=3)

    (htable, hitems) = table(content.box(p_top=20), hash_size, hash_dimension,
                             hash_size // 2)
    for i in range(4):
        hitems[i * 2].box(show="{}+".format(i + 1),
                          width=hash_dimension // 4,
                          height=hash_dimension,
                          x=0).rect(bg_color=colors[i])

    list_wrapper = content.box(p_top=40)
    list_item(list_wrapper,
              show="next+").text("Different bucket for each number")
    list_item(list_wrapper, show="next+").text("Wastes 60B in each cache line")
    list_item(list_wrapper,
              show="next+").text("Equally distributed among buckets")

    slide = new_slide(slides)
    content = slide_header(slide, "Offset = 4096B")

    row = content.box(horizontal=True)
    cols = [row.box(width=width // columns) for _ in range(columns)]

    address(cols, ("Number", "Tag", "Index", "Offset"),
            next=False,
            use_style=False)
    address(cols, ("A", "..100000", "000000", "000000"), next=False)
    address(cols, ("B", "..100001", "000000", "000000"), row=1)
    address(cols, ("C", "..100010", "000000", "000000"), row=2)
    address(cols, ("D", "..100011", "000000", "000000"), row=3)

    (htable, hitems) = table(content.box(p_top=20), hash_size, hash_dimension,
                             hash_size // 2)
    for i in range(4):
        hitems[i % 2].box(show="{}+".format(i + 1),
                          width=hash_dimension // 4,
                          height=hash_dimension,
                          x=0).rect(bg_color=colors[i])

    list_wrapper = content.box(p_top=40)
    list_item(list_wrapper, show="next+").text(
        "Same bucket, but different cache lines for each number!")
    list_item(list_wrapper,
              show="next+").text("Bucket full => evictions necessary")

    slide = new_slide(slides)
    content = slide_header(slide, "How to measure?")
    content.box().text("~tt{l1d.replacement}", style=s(size=48))
    content.box(
        p_top=20).text("How many times was a cache line loaded into L1?")

    if backup:
        bash(content.box(p_top=40, show="next+"),
             """$ perf stat -e l1d.replacement ./example1
4B    offset ->     149 558
4096B offset -> 426 218 383""",
             text_style=s(align="left"))
Esempio n. 16
0
def denormals(slides: SlideDeck, backup: bool):
    if backup:
        slide = new_slide(slides)
        content = slide_header(slide, "Code (backup)")
        code(content.box(), """float F = static_cast<float>(std::stof(argv[1]));
std::vector<float> data(4 * 1024 * 1024, 1);

for (int r = 0; r < 100; r++)
{
    for (auto& item: data)
    {
        item *= F;
    }
}""")
        slide = new_slide(slides)
        content = slide_header(slide, "Result (backup)")
        content.box(width="50%").image("images/example2-time.png")

    slide = new_slide(slides)
    colors = ["#B22222", "#007944", "#0018AE"]
    styles = ["sign", "exponent", "significand"]
    for i, style in enumerate(styles):
        slide.set_style(style, s(color=colors[i], size=42))
    content = slide_header(slide, "Denormal floating point numbers")

    row = content.box(horizontal=True)
    box_dimension = 60

    def floating_point(wrapper: Box, colors, values):
        for i in range(len(colors)):
            box = wrapper.box(width=box_dimension, height=box_dimension)
            box.rect(color="black", bg_color=colors[i], stroke_width=2)
            box.text(str(values[i]), s(color="white", bold=True))

    floating_point(row, [colors[0]] + [colors[1]] * 5 + [colors[2]] * 10, [
        0,
        0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1
    ])
    row_label = content.box(x=0, horizontal=True, show="next+")
    row_label.box(x=250, y=0).text("Zero exponent")
    row_label.box(x=670, y=0).text("Non-zero significand")

    content.box(p_top=80, width=700, height=50).image("images/float.svg")

    list_wrapper = content.box(p_top=20)
    list_item(list_wrapper, show="next+").text("Numbers close to zero")
    list_item(list_wrapper, show="last+").text("Hidden bit = 0, smaller bias")

    content.box(p_top=40, show="next+").text("Operations on denormal numbers are slow!", style=s(size=46))

    slide = new_slide(slides)
    content = slide_header(slide, "Floating point handling")
    content.box(height=600).image("images/haswell-diagram.png")
    content.box(width=130, height=32, x=800, y=52).rect(color=COLOR_FRONTEND, stroke_width=3)
    content.box(width=35, height=100, x=988, y=80).rect(color=COLOR_FRONTEND, stroke_width=3)
    content.box(width=80, height=165, x=212, y=335).rect(color=COLOR_BACKEND, stroke_width=3)

    slide = new_slide(slides)
    content = slide_header(slide, "How to measure?")
    content.box().text("~tt{fp_assist.any}", style=s(size=48))
    content.box(p_top=20).text("How many times the CPU switched to the microcode FP handler?")

    if backup:
        bash(content.box(p_top=40, show="next+"), """$ perf stat -e fp_assist.any ./example2
0   ->          0
0.3 -> 15 728 640""", text_style=s(align="left"))

    slide = new_slide(slides)
    slide.update_style("code", s(size=40))
    content = slide_header(slide, "How to fix it?")
    list_wrapper = content.box()
    list_item(list_wrapper).text("The nuclear option: ~tt{-ffast-math}")
    list_item(list_wrapper, level=1).text("Sacrifice correctness to gain more FP performance")
    list_item(list_wrapper, show="next+").text("Set CPU flags:")
    list_item(list_wrapper, level=1, show="last+").text("Flush-to-zero - treat denormal outputs as 0")
    list_item(list_wrapper, level=1, show="last+").text("Denormals-to-zero - treat denormal inputs as 0")

    code(list_wrapper.box(p_top=40, show="next+"), """_mm_setcsr(_mm_getcsr() | 0x8040);
// or
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
""")
Esempio n. 17
0
def intro(slides: SlideDeck):
    slide = new_slide(slides)
    slide.box().text(
        """CPU design effects
that can degrade performance of your programs""", s(bold=True, size=40))

    slide.box(p_top=40).text("""Jakub Beránek
[email protected]""", s(bold=False, size=30))

    slide = new_slide(slides)
    content = slide_header(slide, "~tt{whoami}")
    list_wrapper = content.box()
    list_item(list_wrapper).text(
        "PhD student @ VSB-TUO, Ostrava, Czech Republic")
    list_item(list_wrapper).text(
        "Research assistant @ IT4Innovations (HPC center)")
    list_item(list_wrapper).text(
        "HPC, distributed systems, program optimization")

    slide = new_slide(slides)
    content = slide_header(slide, "How do we get maximum performance?")
    list_wrapper = content.box()
    list_item(list_wrapper).text("Select the right algorithm")
    list_item(list_wrapper, show="next+").text("Use a low-overhead language")
    list_item(list_wrapper, show="next+").text("Compile properly")
    list_item(list_wrapper,
              show="next+").text("~bold{Tune to the underlying hardware}")