Example #1
0
    sizes = [10]*len(x)
    circles = [1]*len(x)
    crosses = [2]*len(x)
    triangles = [3]*len(x)
    exes = [4]*len(x)
    asterisks = [5]*len(x)
    diamonds = [6]*len(x)
    squares = [7]*len(x)

    scatter_data = ColumnDataSource(dict(x=x, circles=circles, crosses=crosses, triangles=triangles, exes=exes, asterisks=asterisks, diamonds=diamonds, squares=squares, sizes=sizes))

    glyphs = []
    glyphs.append(Circle(x="x", y="circles", size="sizes", fill_color="red", name="the_circles"))
    glyphs.append(Cross(x="x", y="crosses", size="sizes", fill_color="blue", name="the_crosses"))
    glyphs.append(Triangle(x="x", y="triangles", size="sizes", fill_color="green", name="the_triangles"))
    glyphs.append(X(x="x", y="exes", size="sizes", fill_color="purple", name="the_xs"))
    glyphs.append(Asterisk(x="x", y="asterisks", size="sizes", fill_color="orange", name="the_asterisks"))
    glyphs.append(Diamond(x="x", y="diamonds", size="sizes", fill_color="yellow", name="the_diamonds"))
    glyphs.append(Square(x="x", y="squares", size="sizes", fill_color="gray", name="the_squares"))

    legend_items = []
    for glyph in glyphs:
        renderer = p.add_glyph(scatter_data, glyph)
        renderer.name = glyph.name
        legend_items.append((renderer.name, [renderer]))
    
    legend = Legend(
        items=legend_items,
        name="the_legend",
        label_text_font_size='5pt',
        glyph_width=20,
Example #2
0
xdr = DataRange1d()
ydr = DataRange1d()

plot = Plot(title=None,
            x_range=xdr,
            y_range=ydr,
            plot_width=300,
            plot_height=300,
            h_symmetry=False,
            v_symmetry=False,
            min_border=0,
            toolbar_location=None)

glyph = X(x="x",
          y="y",
          size="sizes",
          line_color="#fdae6b",
          line_width=2,
          fill_color=None)
plot.add_glyph(source, glyph)

xaxis = LinearAxis()
plot.add_layout(xaxis, 'below')

yaxis = LinearAxis()
plot.add_layout(yaxis, 'left')

plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

doc = Document()
doc.add(plot)
Example #3
0
def control_chart_multibatch(DataTable, PeakTable, batch, peak, gamma='default', transform='log', parametric=True, zeroflag=True, plot=['Sample', 'QC'], control_limit=False, colormap='Set2'):

    table_check(DataTable, print_statement=False)
    peak_list = PeakTable.Name

    # Create a QC column based on SampleType (if it doesn't exist)
    qc_col = pd.get_dummies(DataTable.SampleType).QC
    try:
        DataTable.insert(3, 'QC', qc_col)
    except ValueError:
        pass

    # Create a Sample column based on SampleType (if it doesn't exist)
    sam_col = pd.get_dummies(DataTable.SampleType).Sample
    try:
        DataTable.insert(3, 'Sample', sam_col)
    except ValueError:
        pass

    # Create a Blank column based on SampleType (if it doesn't exist)
    try:
        blank_col = pd.get_dummies(DataTable.SampleType).Blank
    except AttributeError:
        blank_col = [0] * len(DataTable)  # No blanks
    try:
        DataTable.insert(4, 'Blank', blank_col)
    except ValueError:
        pass

    # Default gamma_range (Temporary add False)
    if gamma in ['default', False]:
        gamma_input = (0.5, 5, 0.2)
    else:
        gamma_input = gamma

    gamma_range = [x / 100.0 for x in range(int(gamma_input[0] * 100), int(gamma_input[1] * 100), int(gamma_input[2] * 100))]

    # Randomly select a peak if "peak = R"
    if peak is 'R':
        pp = randint(0, len(peak_list) - 1)  # Needs to be -1
        peak = peak_list[pp]

    if len(peak_list[peak_list == peak]) == 0:
        raise ValueError("Fatal Error: peak {} does not exist.".format(peak))

    index = peak_list[peak_list == peak].index[0]

    batch_i = batch[0]
    bb = np.unique(DataTable.Batch)
    if batch_i not in bb:
        raise ValueError("Fatal Error: batch {} does not exist".format(batch_i))
    batch_member = np.where(DataTable.Batch == batch_i, 1, 0)

    # Extract and transform data
    x = DataTable[peak]
    x = x[batch_member == 1]
    t = DataTable.Order[batch_member == 1]
    qc = DataTable.QC[batch_member == 1]
    b = DataTable.Batch[batch_member == 1]
    sam = DataTable.Sample[batch_member == 1]
    BatchTable = DataTable[batch_member == 1]
    sampletype = DataTable.SampleType[batch_member == 1]

    if zeroflag == True:
        x = x.replace(0, np.nan)
    if transform is 'log':
        x = np.log10(x)

    # perform the QCRSC
    z, f, curvetype, cvMse, gamma_final, mpa = QCRSC(x, t, qc, gamma_range)
    gamma_final = [gamma_final]
    curvetype = [curvetype]
    mpa = [mpa]
    z_list = []
    z_list.append(z.values)

    for i in batch[1:]:
        batch_i = i
        bb = np.unique(DataTable.Batch)
        if batch_i not in bb:
            raise ValueError("Fatal Error: batch {} does not exist".format(batch_i))
        batch_member = np.where(DataTable.Batch == batch_i, 1, 0)

        # Extract and transform data
        x_i = DataTable[peak]
        x_i = x_i[batch_member == 1]
        t_i = DataTable.Order[batch_member == 1]
        qc_i = DataTable.QC[batch_member == 1]
        b_i = DataTable.Batch[batch_member == 1]
        sam_i = DataTable.Sample[batch_member == 1]
        BatchTable_i = DataTable[batch_member == 1]
        sampletype_i = DataTable.SampleType[batch_member == 1]

        if zeroflag == True:
            x_i = x_i.replace(0, np.nan)
        if transform is 'log':
            x_i = np.log10(x_i)

        #   perform the QCRSC
        z_i, f_i, curvetype_i, cvMse_i, gamma_final_i, mpa_i = QCRSC(x_i, t_i, qc_i, gamma_range)
        x = x.append(x_i)
        t = t.append(t_i)
        qc = qc.append(qc_i)
        sam = sam.append(sam_i)
        b = b.append(b_i)
        BatchTable = BatchTable.append(BatchTable_i)
        sampletype = sampletype.append(sampletype_i)

        f = np.concatenate([f, f_i])
        z_list.append(z_i.values)
        gamma_final = np.concatenate([gamma_final, [gamma_final_i]])
        curvetype = np.concatenate([curvetype, [curvetype_i]])
        mpa = np.concatenate([mpa, [mpa_i]])

    b = b.values

    # Align
    z = []
    for i in range(len(batch)):
        z_align = z_list[i]
        mpa_align = mpa[i]

        # if transform is 'log':
        #     z_align = np.log10(z_align)
        #     mpa_align = np.log10(mpa_align)

        z_align = z_align - mpa_align
        for j in z_align:
            z.append(j)
    z = pd.Series(z, index=x.index)
    mpa = np.nanmedian(mpa, axis=0)
    z = z + np.array(mpa)

    # Calc RSD and D-ratio
    Before_RSD_QC, Before_RSD_Sam, Before_Dratio = calc_rsd_dratio(x, qc, sam, transform, parametric)
    After_RSD_QC, After_RSD_Sam, After_Dratio = calc_rsd_dratio(z, qc, sam, transform, parametric)

    # Calc Blank peak area ratio (Before & After)
    blank_bpar = DataTable[DataTable.Blank == 1][peak]
    if len(blank_bpar) != 0:
        if transform is 'log':
            before_qc_bpar = np.power(10, x[qc == 1])
            after_qc_bpar = np.power(10, z[qc == 1])
        else:
            before_qc_bpar = x[qc == 1]
            after_qc_bpar = z[qc == 1]

        if parametric == True:
            BPAR_Blank = np.nanmean(blank_bpar)
            Before_BPAR_QC = np.nanmean(before_qc_bpar)
            After_BPAR_QC = np.nanmean(after_qc_bpar)
        else:
            BPAR_Blank = np.nanmedian(blank_bpar)
            Before_BPAR_QC = np.nanmedian(before_qc_bpar)
            After_BPAR_QC = np.nanmedian(after_qc_bpar)

        Before_BPAR = BPAR_Blank / Before_BPAR_QC * 100
        After_BPAR = BPAR_Blank / After_BPAR_QC * 100
    else:
        Before_BPAR = np.nan
        After_BPAR = np.nan

    # Information for control limit (before correction)
    x_qc_mean = np.nanmean(x[qc == 1])
    x_qc_std = np.nanstd(x[qc == 1], ddof=1)
    x_qc_rsd = x_qc_std / x_qc_mean * 100
    x_sam = x[sam == 0]

    # Control limit boundaries (before correction)
    if control_limit == False:
        pass
    elif control_limit[0] == 'D-ratio':
        std_sam = np.nanstd(x_sam, ddof=1)
        std_qc = control_limit[1] * std_sam / 100
        before_control_limit_low = x_qc_mean - 2 * std_qc
        before_control_limit_upp = x_qc_mean + 2 * std_qc
    elif control_limit[0] == 'RSD':
        std_for_rsd = control_limit[1] * np.nanmean(x[qc == 1]) / 100 * (x_qc_rsd / Before_RSD_QC)  # Temporary (deal with log)
        before_control_limit_low = x_qc_mean - 2 * std_for_rsd
        before_control_limit_upp = x_qc_mean + 2 * std_for_rsd
    else:
        raise ValueError("Control limit must be either False, ('RSD', value), or ('D-ratio', value)")

    # Information for control limit (after correction)
    z_qc_mean = np.nanmean(z[qc == 1])
    z_qc_std = np.nanstd(z[qc == 1], ddof=1)
    z_qc_rsd = z_qc_std / z_qc_mean * 100
    z_sam = z[sam == 0]

    # Control limit boundaries (after correction)
    if control_limit == False:
        pass
    elif control_limit[0] == 'D-ratio':
        std_sam = np.nanstd(z_sam, ddof=1)
        std_qc = control_limit[1] * std_sam / 100
        after_control_limit_low = z_qc_mean - 2 * std_qc
        after_control_limit_upp = z_qc_mean + 2 * std_qc
    elif control_limit[0] == 'RSD':
        std_for_rsd = control_limit[1] * np.nanmean(z[qc == 1]) / 100 * (z_qc_rsd / After_RSD_QC)  # Temporary (deal with log)
        after_control_limit_low = z_qc_mean - 2 * std_for_rsd
        after_control_limit_upp = z_qc_mean + 2 * std_for_rsd
    else:
        raise ValueError("Control limit must be either False, ('RSD', value), or ('D-ratio', value)")

    ##################################################################################
    #### Plot using BOKEH ####

    output_notebook()

    # Select what to plot
    plot_binary = BatchTable['SampleType'].isin(plot)
    x = x[plot_binary == True]
    t = t[plot_binary == True]
    z = z[plot_binary == True]
    #f = np.array(f)
    #f = f[plot_binary == True]
    sampletype = sampletype[plot_binary == True]
    order = BatchTable.Order[plot_binary == True]

    # Create empty grid (2x2)
    grid = np.full((2, 2), None)

    # Set y_label
    if transform is 'log':
        y_label = 'log(Peak Area)'
    else:
        y_label = 'Peak Area'

    # Get colors
    color_sampletype = BatchTable.SampleType[plot_binary == True].values
    col = []
    for i in range(len(color_sampletype)):
        if color_sampletype[i] == 'Blank':
            col.append('#00FF00')
        elif color_sampletype[i] == 'Sample':
            b_col = b[i]
            colmap = plt.get_cmap(colormap)
            b_rgb = colmap([b_col])
            b_hex = matplotlib.colors.rgb2hex(b_rgb[0])
            col.append(b_hex)
        elif color_sampletype[i] == 'QC':
            col.append('#FF0000')
        else:
            pass

    # Before correction plot
    grid[0, 1] = figure(title="Batch {} {}:{}".format(batch, PeakTable.Name[index], PeakTable.Label[index]), plot_width=600, plot_height=260, x_axis_label='Order', y_axis_label=y_label)
    grid[0, 1].title.text_font_size = '14pt'

    # Before: Plot line ('X' and dash)
    if gamma != False:
        source_before_line = ColumnDataSource(dict(x=t.values, y=f))
        glyph_before_x = X(x="x", y="y", line_width=2, fill_color=None)
        #glyph_before_line = Line(x="x", y="y", line_width=2, line_dash="dashed")
        grid[0, 1].add_glyph(source_before_line, glyph_before_x)
        #grid[0, 1].add_glyph(source_before_line, glyph_before_line)
    else:
        source_before_line = ColumnDataSource(dict(x=t.values, y=np.ones(len(t)) * x_qc_mean))
        glyph_before_line = Line(x="x", y="y", line_width=2, line_dash="dashed")
        grid[0, 1].add_glyph(source_before_line, glyph_before_line)

    # # Before: Plot circles
    source_before_circle = ColumnDataSource(dict(x=t.values, y=x.values, label=sampletype, color=col, Name=order))
    glyph_before_circle = grid[0, 1].circle(x="x", y="y", fill_color="color", fill_alpha=1, size=8, source=source_before_circle)

    # # Before: Add HoverTool
    grid[0, 1].add_tools(HoverTool(
        renderers=[glyph_before_circle],
        tooltips=[
            ("Type", "@label"),
            ("Order", "@Name"), ],))

    # # Before: Add control limit
    if control_limit == False:
        pass
    elif control_limit[0] in ['D-ratio', 'RSD']:
        if np.isnan(before_control_limit_low):  # Can't draw line if it doesn't exist
            pass
        else:
            before_control_limit_low = [before_control_limit_low] * len(t)
            before_control_limit_upp = [before_control_limit_upp] * len(t)
            source_before_control_limit = ColumnDataSource(dict(x=t.values, low=before_control_limit_low, upp=before_control_limit_upp))
            glyph_low = Line(x="x", y="low", line_width=2, line_dash="dashed", line_color='black')
            glyph_upp = Line(x="x", y="upp", line_width=2, line_dash="dashed", line_color='black')
            grid[0, 1].add_glyph(source_before_control_limit, glyph_low)
            grid[0, 1].add_glyph(source_before_control_limit, glyph_upp)

    if gamma != False:
        text_x = 72
    else:
        curvetype = 'nan'
        gamma_final = 'nan'
        text_x = 72

    # Textbox
    grid[0, 0] = figure(title="", plot_width=300, plot_height=265, x_axis_label="", y_axis_label="", outline_line_alpha=0)

    text1 = Label(x=text_x, y=210, x_units='screen', y_units='screen', text='Batch: {}'.format(batch), text_font_size='7.5pt')
    text2 = Label(x=text_x, y=190, x_units='screen', y_units='screen', text='Name: {}'.format(PeakTable.Name[index]), text_font_size='7.5pt')
    text3 = Label(x=text_x, y=170, x_units='screen', y_units='screen', text='Label: {}'.format(PeakTable.Label[index]), text_font_size='7.5pt')
    if transform is 'log':
        text4 = Label(x=text_x, y=150, x_units='screen', y_units='screen', text='log(MPA): {}'.format(np.round(mpa, 2)), text_font_size='7.5pt')
    else:
        text4 = Label(x=text_x, y=150, x_units='screen', y_units='screen', text='MPA: {}'.format(np.round(mpa, 2)), text_font_size='7.5pt')
    text5 = Label(x=text_x, y=130, x_units='screen', y_units='screen', text='Correction method: {}'.format(curvetype), text_font_size='7.5pt')
    text6 = Label(x=text_x, y=110, x_units='screen', y_units='screen', text='Optimal γ: {}'.format(gamma), text_font_size='7.5pt')
    text7 = Label(x=text_x, y=90, x_units='screen', y_units='screen', text='QC %RSD: {}'.format(np.round(Before_RSD_QC, 2)),
                  text_font_size='7.5pt')
    text8 = Label(x=text_x, y=70, x_units='screen', y_units='screen', text='Sam %RSD: {}'.format(np.round(Before_RSD_Sam, 2)),
                  text_font_size='7.5pt')
    text9 = Label(x=text_x, y=50, x_units='screen', y_units='screen', text='D-Ratio: {}'.format(np.round(Before_Dratio, 2)),
                  text_font_size='7.5pt')
    text10 = Label(x=text_x, y=30, x_units='screen', y_units='screen', text='Blank-Ratio: {}'.format(np.round(Before_BPAR, 2)),
                   text_font_size='7.5pt')

    grid[0, 0].add_layout(text1)
    grid[0, 0].add_layout(text2)
    grid[0, 0].add_layout(text3)
    grid[0, 0].add_layout(text4)
    grid[0, 0].add_layout(text5)
    grid[0, 0].add_layout(text6)
    grid[0, 0].add_layout(text7)
    grid[0, 0].add_layout(text8)
    grid[0, 0].add_layout(text9)
    grid[0, 0].add_layout(text10)
    grid[0, 0].circle(0, 0, line_color='white', fill_color='white', fill_alpha=0)  # Necessary to remove warning
    grid[0, 0].xaxis.visible = False
    grid[0, 0].yaxis.visible = False
    grid[0, 0].ygrid.visible = False
    grid[0, 0].xgrid.visible = False

    grid[0, 1].xgrid.visible = False
    grid[0, 1].ygrid.visible = False

    # Show figure
    fig = gridplot(grid.tolist())
    show(fig)