예제 #1
0
    def prep_palette(self, pname, binverse=False):
        """
        Prepares a palette based on a name
        :param pname:
        :return:
        """
        res = palettes.grey(256)

        if pname == 'Greys256':
            res = palettes.grey(256)
        elif pname == 'Inferno256':
            res = palettes.inferno(256)
        elif pname == 'Magma256':
            res = palettes.magma(256)
        elif pname == 'Plasma256':
            res = palettes.plasma(256)
        elif pname == 'Viridis256':
            res = palettes.viridis(256)
        elif pname == 'Cividis256':
            res = palettes.cividis(256)
        elif pname == 'Turbo256':
            res = palettes.turbo(256)
        elif pname == 'Bokeh8':
            res = palettes.small_palettes['Bokeh'][8]
        elif pname == 'Spectral11':
            res = palettes.small_palettes['Spectral'][11]
        elif pname == 'RdGy11':
            res = palettes.small_palettes['RdGy'][11]
        elif pname == 'PiYG11':
            res = palettes.small_palettes['PiYG'][11]

        if binverse:
            res = res[::-1]
        return res
예제 #2
0
def test_cmap_generator_function():
    assert pal.viridis(256) == pal.Viridis256
    assert pal.magma(256) == pal.Magma256
    assert pal.plasma(256) == pal.Plasma256
    assert pal.inferno(256) == pal.Inferno256
    assert pal.gray(256) == pal.Greys256
    assert pal.grey(256) == pal.Greys256
    assert pal.turbo(256) == pal.Turbo256
예제 #3
0
def test_cmap_generator_function():
    assert pal.viridis(256) == pal.Viridis256
    assert pal.magma(256) == pal.Magma256
    assert pal.plasma(256) == pal.Plasma256
    assert pal.inferno(256) == pal.Inferno256
    assert pal.gray(256) == pal.Greys256
    assert pal.grey(256) == pal.Greys256
    assert pal.turbo(256) == pal.Turbo256
    assert pal.diverging_palette(pal.Reds9, pal.Greys9, n=18, midpoint=0.5) == pal.Reds9 + pal.Greys9[::-1]
예제 #4
0
def prepare_colors(labels, colors):
    """
    :param list labels: Data corresponding label list
        If None, it considers all data belong to same category
        numpy.ndarray is available
        It assumes that labels begin 0, and the number of labels is labels.max() + 1
    :param list colors: Data corresponding colors
        If None, it automatically set colors
        numpy.ndarray is available

    :returns: n_labels, unique_labels, colors
        - n_labels : Number of labels. It assumes that label index begins from 0
        - unique_labels : List of unique labels
        - colors : List of color code, length is n_data

    Usage

        >>> labels = [0, 0, 1, 1, 2, 5]
        >>> colors = None
        >>> n_labels, unique_labels, colors = prepare_colors(labels, colors)
        >>> print(n_labels)
        6
    """
    # find unique lables and check num lables
    if labels is not None:
        unique_labels = np.unique(labels)
    else:
        unique_labels = np.zeros(1, dtype=np.int)
    n_labels = unique_labels.max() + 1

    # check inserted colors
    if colors is not None:
        if isinstance(colors, str):
            colors = [colors] * n_labels
        if len(colors) < n_labels:
            raise ValueError(f'There exists {n_labels}.'\
                             ' However, the length of colors is too short')
        return n_labels, unique_labels, colors

    # prepare colors
    if n_labels <= 9:
        colors = Set1[9][:n_labels]
    elif n_labels > 256:
        raise ValueError(f'There exists {n_labels}, too many labels')
    else:
        colors = turbo(n_labels)

    return n_labels, unique_labels, colors
예제 #5
0
파일: main.py 프로젝트: spmcelrath/demos
# print(mcs[pd.to_datetime('2020-11-22'):])
iqr_d = Span(location=pd.to_datetime(dates[itr]),
             dimension='height',
             line_color='grey',
             line_width=2,
             line_alpha=0.3)

# pos = nx.spring_layout(G,pos=fixed_positions, fixed = init_names)

# for i in range(0, len(init_x), 2):
#     print("x:", init_x[i:i+2])
#     print("y:", init_y[i:i+2])

pal_len = len(pd.Series(init_categories).unique())
new_pal = turbo(pal_len)
init_colors = factor_cmap('category',
                          new_pal,
                          pd.Series(init_categories).unique(),
                          nan_color='black')

ds = ColumnDataSource(
    dict(x=init_x,
         y=init_y,
         name=init_names,
         full_name=init_full_names,
         sector=init_sectors,
         category=init_categories,
         size=init_sizes,
         label_offset=init_offsets))
mt.cols().show()
mt.rows().show()

# mt qc check
mt_qc = hl.sample_qc(mt)
p = hl.plot.histogram(mt_qc.sample_qc.call_rate,
                      range=(0.88, 1),
                      legend='Call Rate')
p_2 = hl.plot.histogram(mt_qc.sample_qc.gq_stats.mean, legend='Mean Sample GQ')

# PCA
columns = mt.cols()
pca_scores = columns.population_inference.pca_scores
labels = columns.population_inference.pop
pops = list(set(labels.collect()))
mapper = CategoricalColorMapper(palette=turbo(8), factors=pops)

# plot the first 5 PCs
p = hl.plot.scatter(
    pca_scores[0],
    pca_scores[1],
    label=labels,
    title='PCA',
    xlabel='PC1',
    ylabel='PC2',
    collect_all=True,
    colors=mapper,
)

p = hl.plot.scatter(
    pca_scores[1],
예제 #7
0
def scheduling_plot(result):

    # 스케줄링 결과 txt파일 읽기
    with open(result, 'rt') as file:
        data = file.readlines()

    # 공백 제거
    data = [i.strip() for i in data]

    # 프로세스 개수 num 변수에 저장
    num = data[0].split()
    num = int(num[0])

    # 랜덤 색상 팔레트 생성
    colors = ['']  # colors[0]은 사용하지 않음
    r = lambda: np.random.randint(0, 255)
    for i in range(1, num + 1):
        colors.append('#%02X%02X%02X' % (r(), r(), r()))

    # 프로세스 개수 data 배열에서 삭제
    del data[0]

    start = 0

    # 전체 종료 시간 endTime 변수에 저장
    end = len(data) - 1
    tmp = data[end].split()
    endTime = int(tmp[2])

    # plot 생성, 프로세스 개수와 종료시간에 따라 크기 지정 및 label 설정
    fig, gantt = plt.subplots(figsize=(endTime * 0.5, num * 1.5))

    gantt.set_xlim(0, endTime + 1)
    gantt.set_ylim(0, ((PROC_HEIGHT * num) + (PROC_SPACING * (num + 1))))

    gantt.set_xlabel("Time")
    gantt.set_ylabel("Process")

    yticks = [None] * num
    yticklabels = [None] * num

    for i in range(num):
        yticks[i] = bar_mid(i + 1)
        yticklabels[i] = i + 1

    gantt.set_yticks(yticks)
    gantt.set_yticklabels(yticklabels)

    xticks = [0] * (endTime + 1)
    xticklabels = [None] * (endTime + 1)

    for i in range(endTime + 1):
        xticks[i] = i
        xticklabels[i] = i

    gantt.set_xticks(xticks)
    gantt.set_xticklabels(xticklabels)

    gantt.grid(True)

    # 각 프로세스마다의 바 출력
    for i in data:
        data = i.split()
        execute_time = int(data[2]) - int(data[1])
        gantt.broken_barh([(int(data[1]), execute_time)],
                          (bar_bottom(int(data[0])), (PROC_HEIGHT)),
                          color=turbo(num)[int(data[0]) - 1])

    # 공백 줄이기
    plt.tight_layout()

    # 결과 출력 파일로 저장 시 주석 처리 필요
    # plt.show()

    # png 파일로 결과 저장
    plt.savefig("gantt_chart.png")
def generate_correlation_graph(correlation_matrix_csv_path, path_to_save, title='Correlation Matrix',plot_height=1000, plot_width=1600):
    ## PREPARING CORRELATION MATRIX
    df = pd.read_csv(correlation_matrix_csv_path)
    df = df.set_index('Unnamed: 0').rename_axis('parameters', axis=1)
    df.index.name = 'level_0'

    ## AXIS LABELS FOR PLOT
    common_axes_val = list(df.index)
    df = pd.DataFrame(df.stack(), columns=['correlation']).reset_index()
    source = ColumnDataSource(df)

    ## FINDING LOWEST AND HIGHEST OF CORRELATION VALUES
    low_df_corr_min = df.correlation.min()
    high_df_corr_min = df.correlation.max()
    no_of_colors = len(df.correlation.unique())

    ### PLOT PARTICULARS
    ## CHOOSING DEFAULT COLORS
    mapper = LinearColorMapper(palette=get_reversed_list(cividis(no_of_colors)), low=low_df_corr_min, high=high_df_corr_min)

    ## SETTING UP THE PLOT
    p = figure(title=title,x_range=common_axes_val, y_range=list((common_axes_val)),x_axis_location="below", plot_width=plot_width, plot_height=plot_height,tools=BOKEH_TOOLS, toolbar_location='above',tooltips=[('Parameters', '@level_0 - @parameters'), ('Correlation', '@correlation')])
    p.toolbar.autohide = True

    ## SETTING UP PLOT PROPERTIES
    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = "12pt"
    p.xaxis.major_label_orientation = pi/2

    ## SETTING UP HEATMAP RECTANGLES
    cir = p.rect(x="level_0", y="parameters", width=1, height=1,source=source,fill_color={'field': 'correlation', 'transform': mapper},line_color=None)

    ## SETTING UP COLOR BAR
    color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",ticker=BasicTicker(desired_num_ticks=10),formatter=PrintfTickFormatter(format="%.1f"),label_standoff=6, border_line_color=None, location=(0, 0))
    p.add_layout(color_bar, 'right')

    ## AVAILABLE COLOR SCHEMES
    COLOR_SCHEME = {
        'Cividis':get_reversed_list(cividis(no_of_colors)),
        'Gray':get_reversed_list(gray(no_of_colors)),
        'Inferno':get_reversed_list(inferno(no_of_colors)),
        'Magma':get_reversed_list(magma(no_of_colors)),
        'Viridis':get_reversed_list(viridis(no_of_colors)),
        'Turbo':get_reversed_list(turbo(no_of_colors)),
    }

    ## JS CALLBACK
    callback = CustomJS(args=dict(col_sch=COLOR_SCHEME,low=low_df_corr_min,high=high_df_corr_min,cir=cir,color_bar=color_bar), code="""
    // JavaScript code goes here
    var chosen_color = cb_obj.value;
    var color_mapper = new Bokeh.LinearColorMapper({palette:col_sch[chosen_color], low:low, high:high});
    cir.glyph.fill_color = {field: 'correlation', transform: color_mapper};
    color_bar.color_mapper.low = low;
    color_bar.color_mapper.high = high;
    color_bar.color_mapper.palette = col_sch[chosen_color];
    """)

    ## SELECT OPTION FOR INTERACTIVITY GIVEN TO USER
    select = Select(title='Color Palette',value='cividis', options=list(COLOR_SCHEME.keys()), width=200, height=50)

    ## CALL BACK TO BE TRIGGERED WHENEVER USER SELECTS A COLOR PALETTE
    select.js_on_change('value', callback)

    ## GENERATION FINAL PLOT BY BINDING PLOT AND SELECT OPTION
    final_plot = layout([[select],[p]])
    curdoc().add_root(final_plot)
    output_file(path_to_save)
    save(final_plot)
    carry_bokeh_correction(path_to_save)
def query(output):  # pylint: disable=too-many-locals
    """Query script entry point."""

    hl.init()

    mt = hl.read_matrix_table(HGDP1KG_TOBWGS)
    # Get NFE samples only
    mt = mt.filter_cols((
        mt.hgdp_1kg_metadata.population_inference.pop == 'nfe')
                        | (mt.s.contains('TOB')))
    scores = hl.read_table(SCORES)
    mt = mt.annotate_cols(scores=scores[mt.s].scores)
    mt = mt.annotate_cols(TOB_WGS=mt.s.contains('TOB'))

    # PCA plot must all come from the same object
    columns = mt.cols()
    pca_scores = columns.scores
    labels = columns.TOB_WGS
    hover_fields = dict([('s', columns.s)])

    # get percent variance explained
    eigenvalues = hl.import_table(EIGENVALUES)
    eigenvalues = eigenvalues.to_pandas()
    eigenvalues.columns = ['eigenvalue']
    eigenvalues = pd.to_numeric(eigenvalues.eigenvalue)
    variance = eigenvalues.divide(float(eigenvalues.sum())) * 100
    variance = variance.round(2)

    # Get number of PCs
    number_of_pcs = len(eigenvalues)

    print('Making PCA plots labelled by study')
    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        p = hl.plot.scatter(
            pca_scores[pc1],
            pca_scores[pc2],
            label=labels,
            title='TOB-WGS',
            xlabel='PC' + str(pc1 + 1) + ' (' + str(variance[pc1]) + '%)',
            ylabel='PC' + str(pc2 + 1) + ' (' + str(variance[pc2]) + '%)',
            hover_fields=hover_fields,
        )
        plot_filename = f'{output}/study_pc' + str(pc2) + '.png'
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(p).save(f, format='PNG')
        plot_filename_html = 'study_pc' + str(pc2) + '.html'
        output_file(plot_filename_html)
        save(p)
        subprocess.run(['gsutil', 'cp', plot_filename_html, output],
                       check=False)

    print('Making PCA plots labelled by the subpopulation')
    labels = columns.hgdp_1kg_metadata.labeled_subpop
    pops = list(set(labels.collect()))

    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        p = hl.plot.scatter(
            pca_scores[pc1],
            pca_scores[pc2],
            label=labels,
            title='Sub-Population',
            xlabel='PC' + str(pc1 + 1) + ' (' + str(variance[pc1]) + '%)',
            ylabel='PC' + str(pc2 + 1) + ' (' + str(variance[pc2]) + '%)',
            collect_all=True,
            colors=CategoricalColorMapper(palette=turbo(len(pops)),
                                          factors=pops),
        )
        plot_filename = f'{output}/subpopulation_pc' + str(pc2) + '.png'
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(p).save(f, format='PNG')
        plot_filename_html = 'subpopulation_pc' + str(pc2) + '.html'
        output_file(plot_filename_html)
        save(p)
        subprocess.run(['gsutil', 'cp', plot_filename_html, output],
                       check=False)
예제 #10
0
파일: app.py 프로젝트: ZenRay/Python4Fun
y = X[:, 1]

index = np.argsort(x)
# import ipdb; ipdb.set_trace()
x = np.sort(x)
y = y[index]

pred = np.nan*np.zeros(len(X))
error = np.nan*np.zeros(len(X))
# 表示的是残差间的点
error_0s = [np.array(np.nan*np.zeros(2)) for i in range(0,len(X))]
error_1s = [np.array(np.nan*np.zeros(2)) for i in range(0,len(X))]
error_index = np.nan*np.zeros(len(X))
error_value = np.zeros(len(X))

error_colors = palettes.turbo(size)

# create datasource
source = ColumnDataSource(data=dict(x=x, y=y, pred=pred, error_0s=error_0s, \
    error_1s=error_1s, error_index=error_index, error_value=error_value, \
        color=error_colors))

# 显示原始点信息
plot = figure(plot_height=800, plot_width=800, title="不同参数变化残差变化", \
    x_range=[min(min(x)*.1, min(x) * 1.5), max(max(x) * .2, max(x)*1.5)], \
        y_range=[min(min(y)*.1, min(y) * 1.5), max(max(y) * .2, max(y)*1.5)])
plot.grid.visible = False

# 方差变化信息
var_plot = figure(plot_width=600, title="方差变化")

def draw_plot(name, color):
    p.line(data['date'][data.Symbol == name],
           data['open'][data.Symbol == name],
           color=color,
           legend_label=name)
    p.circle(data['date'][data.Symbol == name],
             data['open'][data.Symbol == name],
             color=color,
             legend_label=name)
    # The location of the legend labels is controlled by the location property
    p.legend.location = "top_left"
    p.legend.click_policy = "hide"
    p.legend.title = 'Ticker'
    p.legend.title_text_font_style = "bold"
    p.legend.title_text_font_size = "15pt"
    return p


# with concurrent.futures.ThreadPoolExecutor() as executor:
#     args = ((name, color) for name, color in zip(company_list, turbo(n)))
#     executor.map(lambda x: draw_plot(*x), args)

for name, color in zip(company_list, turbo(n)):
    draw_plot(name, color)

# Specify the name of the output file and show the result
output_file('materials.html')
show(p)
예제 #12
0
def get_data(company):
    dtf = data[data.Symbol == company].set_index('date')
    for name, color in zip(company_list, turbo(n)):
        if name == company:
            dtf['color'] = color
    return dtf
예제 #13
0
파일: p2_tooltip.py 프로젝트: Axii99/CS490
TOOLS = "pan,wheel_zoom,reset,box_select,lasso_select,help"

data = pd.read_csv("factbook.csv")

TOOLTIPS = [("index", "$index"), ("Country", "@Country"),
            ("GDP per Capita", "@{GDP per capita}"),
            ("Life expectancy at birth", "@{Life expectancy at birth}"),
            ("Population", "@{Population}"), ("Birth rate", "@{Birth rate}")]

length = len(data.index)
plot1 = figure(tools=TOOLS, tooltips=TOOLTIPS, plot_width=800, plot_height=400)
plot2 = figure(tools=TOOLS, tooltips=TOOLTIPS, plot_width=800, plot_height=400)
plot3 = figure(tools=TOOLS, tooltips=TOOLTIPS, plot_width=800, plot_height=400)
plot4 = figure(tools=TOOLS, tooltips=TOOLTIPS, plot_width=800, plot_height=400)
colors = turbo(11)

data = data.rename(columns=lambda x: x.strip())
GDPperCapita = data['GDP per capita'].str.replace("$", "").str.replace(
    ",", "").str.strip().astype(float)
Life = np.array(data['Life expectancy at birth'].values)
population = data['Population'].str.replace(",", "").astype(float)
population = np.array([
    float(x - min(population.values)) / float(
        (max(population.values) - min(population.values))) * 29.0 + 5
    for x in population.values
])
#print(population)
birthrate = np.array(data['Birth rate'])
colorlist = [colors[int((i / max(birthrate)) * 10)] for i in birthrate]
GDPperCapita = np.array(GDPperCapita.values)
예제 #14
0
def Electron_Energy_Graph_Old(conn):

    ############################################################################
    #################### CREATE THE DATA FOR THE GRAPH #########################

    output_file(
        "Electron_Output_Graph.html"
    )  #????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????

    # Use the connection passed to the function to read the data into a
    # dataframe via an SQL query.
    df = pd.read_sql('SELECT * FROM [eEnergyICP]', conn)
    print(df)

    # Delete cells where 'protocol id' is empty
    df = df.dropna(subset=['protocol id'])

    # With any luck this can be removed after chatting to AW and MB ?????????????????????????????????????????????????????????????????????????????????
    # Get the date and machinename from the protocol id' field
    # Seperate on the first '_'
    df_left = df['protocol id'].str.partition(sep='_')
    # Seperate on the last '_'
    df_right = df['protocol id'].str.rpartition(sep='_')
    # From these sperated dataframes add the appropriate columns back into the
    # main dataframe.
    df.loc[:, 'adate'] = df_left[0]
    df.loc[:, 'machinename'] = df_right[2]

    # Turn 'adate' into datetime. Problems with this function as it assumes american date formats over british. ?????????????????????????????????????????????????????????????????????????????????
    # Talk to AW and MB about getting date from other tables in the database and pulling them into the query. ???????????????????????????????????????????????????????????????????????????????????
    # This way the date should be in a set format that the datetime function can be told, which should resolve this issue. ??????????????????????????????????????????????????????????????????????
    #
    # Need to turn the date fields into a Dateime object (either 'adate'
    # (protons) or the newly created 'adate' (photons)). The date field should
    # always be named 'adate' for consistency.
    df.loc[:, 'adate'] = pd.to_datetime(df.loc[:, 'adate'])

    # When starting a new graph can be useful to print the dataframe after any
    # manipulations to make sure the code has done what you expected.
    print(df)

    # Create a list of the fields using the dataframe
    TableFields = (list(df.columns))

    ############################################################################
    ############################################################################

    ############################################################################
    ################ CREATE THE DATAFRAME FOR THE TOLERANCES ###################

    # The purpose of this plot is generally to be as general as possible but
    # there are only a few parameters that will have defined tolerances.
    # Therefore the tolerance section can be a bit more specific and a dataframe
    # containing tolereances can be manually created for many cases and
    # extracted from the database in others (in a manner similar to above but
    # calling from a different table/query with the SQL statement)
    #
    # The format of the dataframe should be the first line being the x_axis
    # (with some values taken from the main dataframe to get the right
    # formatting). The subsequent columns are the tolerances [low, high].
    # NB: column names should match those from the main dataframe.
    df_tol1 = pd.read_sql('SELECT * FROM [ElectronFWHMLimits]', conn)
    print(df_tol1)
    df_tol1 = df_tol1.set_index('class')
    print(df_tol1)

    df_tol_TB = pd.DataFrame({
        'adate': [df['adate'].max(), df['adate'].max()],
        '6fwhm':
        [df_tol1.loc['TBUCLH', 'lower6'], df_tol1.loc['TBUCLH', 'upper6']],
        '9fwhm':
        [df_tol1.loc['TBUCLH', 'lower9'], df_tol1.loc['TBUCLH', 'upper9']],
        '12fwhm':
        [df_tol1.loc['TBUCLH', 'lower12'], df_tol1.loc['TBUCLH', 'upper12']],
        '15fwhm':
        [df_tol1.loc['TBUCLH', 'lower15'], df_tol1.loc['TBUCLH', 'upper15']]
    })
    print(df_tol_TB)

    df_tol_Classic = pd.DataFrame({
        'adate': [df['adate'].max(), df['adate'].max()],
        '6fwhm':
        [df_tol1.loc['Classic', 'lower6'], df_tol1.loc['Classic', 'upper6']],
        '9fwhm':
        [df_tol1.loc['Classic', 'lower9'], df_tol1.loc['Classic', 'upper9']],
        '12fwhm':
        [df_tol1.loc['Classic', 'lower12'], df_tol1.loc['Classic', 'upper12']],
        '16fwhm':
        [df_tol1.loc['Classic', 'lower16'], df_tol1.loc['Classic', 'upper16']],
        '20fwhm':
        [df_tol1.loc['Classic', 'lower20'], df_tol1.loc['Classic', 'upper20']]
    })
    print(df_tol_Classic)

    ############################################################################
    ############################################################################

    ##########################################################################
    ################### CREATE THE COLUMNS FOR THE LEGEND ######################

    # NB: The following section has been designed to be as general as possible
    # but in reality it might be preferable to more manually choose the markers
    # and colors based on optimising the most likey things to be plotted.
    #
    # This code is a way of creating a legend with markers based on one
    # parameter (e.g. machine name) and color on another parameter (e.g. energy)

    ######### Colors:
    # Create a sorted list of the unique values in a dataframe column that the
    # colors will be based on.
    list_forcolor = sorted(df['machinename'].unique().tolist())
    # If the length of the list is <9 then we can use the colorblind palette,
    # which contains 8 colors. This should be the default for accessability
    # reasons unless there are compeling reasons otherwise.
    if len(list_forcolor) < 9:
        color_palette = Colorblind[len(list_forcolor)]
    # If not <9 then we can use the much larger Turbo palette which contains
    # 256 colors. Will check if there are more than 256 options though and
    # throw an error if so.
    elif len(list_forcolor) > 256:
        print( 'Error - Name of Function: >256 unique energies in database ' \
          'causing failure of the turbo color palette function (only ' \
             '256 availible colors.' )
        exit()
    # If it passes the check then use the built in turbo function that splits
    # the turbo palette into roughly equal sections based on a supplied integer
    # number.
    else:
        color_palette = turbo(len(list_forcolor))

    ######### Markers:
    # Doesn't seem to be a way to create a simple list of all the Bokeh marker
    # options so just do this manually. May want to re-order to improve
    # visibility of commonly used options.
    markers = [
        'asterisk', 'circle', 'circle_cross', 'circle_x', 'cross', 'dash',
        'diamond', 'diamond_cross', 'hex', 'inverted_triangle', 'square',
        'square_cross', 'square_x', 'triangle', 'x'
    ]
    # Create a sorted list of the unique values in a dataframe column that the
    # markers will be based on.
    list_formarker = sorted(df['machinename'].unique().tolist())
    # Check that there are enough markers to give a unique marker to each option
    # but otherwise throw an error.
    if len(list_formarker) > len(markers):
        print( 'Error - Name of Function: Not enough markers to assign a ' \
            'unique marker to each option.' )
        exit()

    ######### Legend Key:
    # Create a function that will be used to run through the dataframe looking
    # at the energy and machine column and creating a new column that will have
    # values for both seperated by a '_', stored as a string.
    def add_legend(row):
        return str(str(row['machinename']))

    # Run the function and also copy the other columns into new columns so that
    # when ther are renamed to 'x' and 'y' later they are still availible for
    # the legend if needed.
    df.loc[:, 'legend'] = df.apply(lambda row: add_legend(row), axis=1)
    df.loc[:, 'machinename1'] = df.loc[:, 'machinename']
    print(df)

    ############################################################################
    ############################################################################

    ############################################################################
    ################## FORMATTING AND CREATING A BASIC PLOT ####################

    ############################################################################
    ############################# USER INPUTS ##################################

    # Decide what the default viewing option is going to be. (i.e. the fields to
    # be plotted on the x and y axis when the graph is opened, the plot size
    # etc.).

    # From the legend defined above give the values that will be pre-ticked when
    # the plot is opened
    color_to_plot = ['TrueBeam B', 'TrueBeam C']
    marker_to_plot = color_to_plot

    # Decide on what data to plot on the x/y axis when opened.
    x_data1 = 'adate'
    y_data1 = '6fwhm'
    # Decide what the plot formatting will be, inluding the plot title, axis
    # titles and the size of the plot.
    plot_title1 = 'Electron Energy'
    x_axis_title1 = x_data1
    y_axis_title1 = y_data1
    plot_size_height1 = 450
    plot_size_width1 = 800
    legend_location = 'bottom_left'

    # Create a list of the plot parameters that will be used as input to a
    # function later.
    list_plot_parameters = [
        x_data1, y_data1, plot_title1, x_axis_title1, y_axis_title1,
        plot_size_height1, plot_size_width1, legend_location
    ]

    ############################################################################
    ############################################################################

    ############################################################################
    ########################### CREATE THE PLOT ################################

    # Create the actual ploy. Generally it's a good idea to do this by defining
    # functions as they can then be used in the callbacks later without having
    # a lot of redundant very similar code.

    ######### Make Dataset:
    # Define a make dataset function that can be used now but also called later
    # in the callback functions to save re-writing similar code later.
    def make_dataset(color_to_plot, marker_to_plot, x_data1, y_data1):
        # Create a sub dataframe
        Sub_df1 = df.copy()
        # Delete any rows in the sub-dataframes that do not exist in the
        # checkboxes/default user choices. (e.g. if you've selected 6MV in the
        # checkbox this will remove any rows that have something other than 6MV)
        Sub_df1 = Sub_df1[Sub_df1['machinename'].isin(color_to_plot)]
        Sub_df1 = Sub_df1[Sub_df1['machinename'].isin(marker_to_plot)]
        # Search for the columns with the x_data and y_data names and replace
        # them with 'x' and 'y'. Unless plotting the same data on both in which
        # case add an extra column for 'y' that's a copy of 'x'
        if x_data1 == y_data1:
            Sub_df1.rename(columns={x_data1: 'x'}, inplace=True)
            Sub_df1.loc[:, 'y'] = Sub_df1.loc[:, 'x']
        else:
            Sub_df1.rename(columns={x_data1: 'x'}, inplace=True)
            Sub_df1.rename(columns={y_data1: 'y'}, inplace=True)
        # Return the newly created Sub_df1
        return Sub_df1

    # Run the make_dataset function to create a sub dataframe that the plot will
    # be made from.
    Sub_df1 = make_dataset(color_to_plot, marker_to_plot, x_data1, y_data1)

    # Create a Column Data Source. This is important as it is the data format
    # needed for Bokeh. When making this it is useful to convert the dataframe
    # into a dictionary, which seems to help with the callback function (see
    # 'Common Issues' for details).
    src1 = ColumnDataSource(Sub_df1.to_dict(orient='list'))

    ######### Make Plot:
    # Create an empty plot (plot parameters will be applied later in a way that
    # can be manipulated in the callbacks)
    p1 = figure()
    # Create a scatter plot.
    p1.scatter(  # source = The ColumnDataSource made above.
        source=src1,
        # x/y = 'x'/'y' which are fields that were renamed as such in
        # the make_dataset function
        x='x',
        y='y',
        # Some general parameters about marker size. These seem like
        # reasonable values but possible could alter these in a
        # callback?
        fill_alpha=0.4,
        size=12,
        # Create the legend using the created fields added in the legend
        # section. Use the factor_mark and factor_cmap functions to
        # match the colors/markers to the right lists.
        # NB: Always use legend_field for this not legend_group as the
        # former acts on the javascript side but the latter the Python
        # side. Therefore the former will update automatically when the
        # plot is changed with no need for a callback.
        legend_field='legend',
        marker=factor_mark('machinename1', markers, list_formarker),
        color=factor_cmap('machinename1', color_palette, list_forcolor))

    ######### Add plot parameters:
    # Define a define plot parameters factor that can be used now but also
    # called later in the callback functions.
    def define_plot_parameters(list):

        # Input is a List of format:
        # list_plot_parameters = [	x_data1, y_data1,
        # 	 						plot_title1, x_axis_title1, y_axis_title1,
        # 							plot_size_height1, plot_size_width1,
        # 							legend_location	]

        # The parameters have to be controlled like this in a callback to allow
        # for them to be adjusted. Otherwise the plot parameters are not
        # interactive.
        # 	Yes!	- p1.xaxis.axis_label = 'X_axis_title'
        # 	No! 	- p1 = figure(x_axis_label = 'X_axis_title')
        p1.title.text = list[2]
        p1.xaxis.axis_label = list[3]
        p1.yaxis.axis_label = list[4]
        p1.plot_height = list[5]
        p1.plot_width = list[6]
        p1.legend.location = list[7]

        # If the user wants to plot an axis as datetime then the axis needs to
        # be reformatted. Will do this by checking if the x_data1/y_data1 is
        # =='adate'.
        # NB: This only works if 'adate' is used as the name for the date column
        # and also that this is the only date column.
        if list[0] == 'adate':
            p1.xaxis.formatter = DatetimeTickFormatter(days=['%d/%m', '%a%d'])
        else:
            p1.xaxis.formatter = BasicTickFormatter()
        if list[1] == 'adate':
            p1.yaxis.formatter = DatetimeTickFormatter(days=['%d/%m', '%a%d'])
        else:
            p1.yaxis.formatter = BasicTickFormatter()

        return

    # Run the define_plot_parameters function to format the plot.
    define_plot_parameters(list_plot_parameters)

    ############################################################################
    ############################################################################

    ############################################################################
    ############################################################################

    ############################################################################
    ############################ ADD TOLERANCES ################################

    # We defined the tolerances further up and now want to add the correct ones
    # to the plot (having created the plot above). Again this will be done with
    # functions and in a way that the functions can be used in the callbacks
    # later.
    #
    # NB: At the moment this is still a bit of a work in progress and shows the
    # way to add line tolerances. Another option might be to add colorblocks
    # using varea and/or varea_stack.
    #
    # NB: Also this funcion assumes that tolerances will all be against one
    # x_axis value (e.g. date). This is probably the majority of use cases but
    # probably relatively trivial to add further toleraces against other x_axis
    # data.

    # Create a function that will create a dataframe that can be used to supply
    # a plot of two tolerance lines. This will including 'appearing' and
    # 'disappearing' depending on whether tolerances are defined or not.

    def tolerances(x_data1, y_data1, Sub_df1, df_tol1):

        # Get a list of the column headers from the tolerance table defined
        # earlier.
        headers1 = df_tol1.columns.values.tolist()

        # Check if the xdata is what is in the df_tol1 as the x_axis (if not no
        # point plotting tolerances as all tolerances are vs this tolerance).
        if x_data1 != headers1[0]:
            # x_data1 doesn't match so going to output something that should
            # basically just not plot but also won't throw the viewing range.
            data = {
                'x': [Sub_df1['x'].max(), Sub_df1['x'].max()],
                'y_low': [Sub_df1['y'].max(), Sub_df1['y'].max()],
                'y_high': [Sub_df1['y'].max(), Sub_df1['y'].max()]
            }
            Sub_df1_tol1 = pd.DataFrame(data)
            return Sub_df1_tol1
        # Otherwise we have the right x_data1 so now just check if it's datetime
        # or not.
        if x_data1 == 'adate':
            # It has the format 'adate' so should be datetime. So find the max
            # min dates in the Sub_df1 and add a couple of weeks either side so
            # that it plots the full range (plus a little bit for visualisation
            # reasons).
            max_x = Sub_df1['x'].max() + pd.DateOffset(weeks=2)
            min_x = Sub_df1['x'].min() + pd.DateOffset(weeks=-2)
        else:
            # If it's not datetime then just add about 5% of the range to
            # either side to make the plot look nicer.
            # NB: This has not been checked extensively as most tolerances are
            # vs. time.
            max_x = Sub_df1['x'].max()
            min_x = Sub_df1['x'].min()
            range = max_x - min_x
            max_x = max_x + (range / 20)
            min_x = min_x - (range / 20)

        # Used the x part so now remove the element from the list. This will
        # help for the small case where x_data1 == ydata1.
        headers1.remove(x_data1)

        if y_data1 in headers1:
            # If y_data1 is in the list then loop through to find out where and
            # get the data from the tolerance dataframe.
            for x in headers1:
                if y_data1 == x:
                    # When the loop has found where it is then can output a
                    # dataframe of the form:
                    # 	x = [far left of plot, far right of plot]
                    # 	y_low = [low_tolerance, low_tolerance]
                    # 	y_high = [high_tolerance, high_tolerance]
                    data = {
                        'x': [min_x, max_x],
                        'y_low': [df_tol1[x][0], df_tol1[x][0]],
                        'y_high': [df_tol1[x][1], df_tol1[x][1]]
                    }
                    Sub_df1_tol1 = pd.DataFrame(data)
        else:
            # If y_data1 is not in the headers1 list then there are no
            # tolerances to plot so going to output something that should
            # basically just not plot but also won't throw the viewing range.
            data = {
                'x': [Sub_df1['x'].max(), Sub_df1['x'].max()],
                'y_low': [Sub_df1['y'].max(), Sub_df1['y'].max()],
                'y_high': [Sub_df1['y'].max(), Sub_df1['y'].max()]
            }
            Sub_df1_tol1 = pd.DataFrame(data)
            return Sub_df1_tol1

        return Sub_df1_tol1

    def choose_tolerances(x_data1, y_data1, Sub_df1, color_to_plot):

        if any(item in color_to_plot for item in
               ['TrueBeam B', 'TrueBeam C', 'TrueBeam D', 'TrueBeam F']):
            # If this is true then will need to run the df_tol_TB tolerances
            Sub_df1_tol_TB = tolerances(x_data1, y_data1, Sub_df1, df_tol_TB)
        else:
            data = {
                'x': [Sub_df1['x'].max(), Sub_df1['x'].max()],
                'y_low': [Sub_df1['y'].max(), Sub_df1['y'].max()],
                'y_high': [Sub_df1['y'].max(), Sub_df1['y'].max()]
            }
            Sub_df1_tol_TB = pd.DataFrame(data)

        if any(item in color_to_plot
               for item in ['Linac B', 'Linac C', 'Linac D', 'Linac E']):
            # If this is true then will need to run the df_tol_TB tolerances
            Sub_df1_tol_Classic = tolerances(x_data1, y_data1, Sub_df1,
                                             df_tol_Classic)
        else:
            data = {
                'x': [Sub_df1['x'].max(), Sub_df1['x'].max()],
                'y_low': [Sub_df1['y'].max(), Sub_df1['y'].max()],
                'y_high': [Sub_df1['y'].max(), Sub_df1['y'].max()]
            }
            Sub_df1_tol_Classic = pd.DataFrame(data)

        return Sub_df1_tol_TB, Sub_df1_tol_Classic

    # Run the tolerances function to output the new dataframe
    Sub_df1_tol_TB, Sub_df1_tol_Classic = choose_tolerances(
        x_data1, y_data1, Sub_df1, color_to_plot)

    # Turn the dataframe into a new ColumnDataSource (again turning it into a
    # dictionary)
    src1_tol_TB = ColumnDataSource(Sub_df1_tol_TB.to_dict(orient='list'))
    src1_tol_Classic = ColumnDataSource(
        Sub_df1_tol_Classic.to_dict(orient='list'))

    # Add two lines to the plot using the new ColumnDataSource as the source,
    # one line for low tolerance and one line for high.
    p1.line(source=src1_tol_TB, x='x', y='y_low', color='firebrick')
    p1.line(source=src1_tol_TB, x='x', y='y_high', color='firebrick')
    p1.line(source=src1_tol_Classic, x='x', y='y_low', color='hotpink')
    p1.line(source=src1_tol_Classic, x='x', y='y_high', color='hotpink')

    ############################################################################
    ############################################################################

    ############################################################################
    ################## ADD MORE COMPLEX TOOLS TO THE PLOT ######################

    # Create tools here that will allow for some manipulation or inspection of
    # plotted data.
    #
    # As an example a 'HoverTool' will be added to the plot.
    #
    # Other useful tools and details of the syntax can be found here:
    # https://docs.bokeh.org/en/latest/docs/user_guide/tools.html

    # Create the hover tool (see website above for syntax/details).
    # This example creates a hover tool that displays:
    # 	Date: 			The value of the data-point as measued on the x-axis
    # 					(formatted for datetime)
    # 	Y-Axis:			The value of the data-point as measued on the y-axis
    # 	(x,y):			The x and y co-ordinates in plot space
    # 	Chamber Comb.:	The data stored under the 'Chamber' column for that
    # 					data-point.
    # 	Comments:		The data stored under the 'comments' column for that
    #					data-point.
    hover = HoverTool(tooltips=[('Date', '@x{%F}'), ('Y-Axis', '@y'),
                                ('(x,y)', '($x, $y)'),
                                ('Chamber Comb.', '@Chamber'),
                                ('Comments', '@comments')],
                      formatters={'x': 'datetime'})

    # Add the newly created tool to the plot.
    p1.add_tools(hover)

    ############################################################################
    ############################################################################

    ############################################################################
    ################# CREATE WIDGETS TO BE ADDED TO THE PLOT ###################

    # Create widgets here that will allow for some manipulation of the plotted
    # data. These widgets provide an interactive ability that can alter the data
    # that is plotted, provide update fnctions and call other programmatic
    # functions. This is done either using built in Bokeh functionality or
    # using more powerful but complex python and javascript based callbacks.
    #
    # As an example some 'Select' widgets, 'Checkbox' widgets and 'RangeSliders'
    # will be added to the plot.
    #
    # Other useful widgets and details of the syntax can be found here:
    # https://docs.bokeh.org/en/latest/docs/user_guide/interaction/widgets.html

    ######## 1)
    # Create the select widget (see website above for syntax/details). This
    # widget will be used for the callback example later to change data plotted
    # on the x/y-axis.
    # This example creates a select tool that displays:
    # 	Dropdown list containing a list of every field that was downloaded from
    # 	the database.
    # 	NB: 	When making a list it may be worth manually creating it to limit
    # 			it to the fields that can be plotted (e.g. not including fields
    # 			like 'Comments'). This will shorten the dropdown list but you
    # 			should err on the side of inclusion to make the final plot as
    # 			flexible as possible.
    #
    # Create a list of the availible options
    menu_axis = []
    for field in TableFields:
        menu_axis.append(field)
    menu_axis = sorted(menu_axis)
    # Select tool needs inputs for the title, a starting value and the just
    # created list to supply the available options.
    select_xaxis = Select(title='X-Axis Fields Available:',
                          value=x_axis_title1,
                          options=menu_axis)
    select_yaxis = Select(title='Y-Axis Fields Available:',
                          value=y_axis_title1,
                          options=menu_axis)

    ######## 2)
    # This select widget will be made in the same way and used to create a
    # dropdown list to change the legend position.
    #
    # Create a list of the availible options
    menu_legend = [
        'top_left', 'top_center', 'top_right', 'center_left', 'center',
        'center_right', 'bottom_left', 'bottom_center', 'bottom_right'
    ]
    # Create the select tool as above
    select_legend = Select(title='Legend Position',
                           value=legend_location,
                           options=menu_legend)

    ######## 3)
    # These checkbox widgets will be used to create a tool to select the
    # values that are being plotted from the fields that the legend is based on.
    #
    # NB: There is some built in Bokeh functionality for interavtive legends
    # that can fulfill some of the same goals where the number of options is
    # limited to something that can display on a reasonably sized legend. May
    # be a better and more robust solution where possible.

    # Create a list of all unique names in the column chosen to be matched to
    # markers (sorted).
    options_marker = sorted(df['machinename'].unique().tolist())
    # Create an index list for all of the values that should be pre-ticked.
    index_marker = [
        i for i in range(len(options_marker))
        if options_marker[i] in marker_to_plot
    ]
    # Create the checkbox, providing the list of availible options and a list
    # of what should be active (pre-ticked).
    checkbox_marker = CheckboxGroup(labels=options_marker,
                                    active=index_marker,
                                    visible=False)

    # Do the same for the column that was matched to colors.
    options_color = sorted(df['machinename'].unique().tolist())
    index_color = [
        i for i in range(len(options_color))
        if options_color[i] in color_to_plot
    ]
    checkbox_color = CheckboxGroup(labels=options_color, active=index_color)

    ######## 4)
    # Make some range sliders that will be used to manipulate the x-axis and
    # y-axis range.

    # Most of the manipulation will be done using a later function but will need
    # to create the bare minimum rangeslider first that can later be manipulated
    # (This seems to be the minimum number of parameters needed to create these
    # widgets). Note that a RangeSliders AND a DateRangeSlider needs to be
    # created for each axis.
    range_slider_x = RangeSlider(title='X-Axis Range',
                                 start=0,
                                 end=1,
                                 value=(0, 1),
                                 step=0.1)
    range_slider_y = RangeSlider(title='Y-Axis Range',
                                 start=0,
                                 end=1,
                                 value=(0, 1),
                                 step=0.1)
    range_slider_xdate = DateRangeSlider(title='X-Axis Range (Date)',
                                         start=date(2017, 1, 1),
                                         end=date(2017, 1, 2),
                                         value=(date(2017, 1,
                                                     1), date(2017, 1, 2)),
                                         step=1)
    range_slider_ydate = DateRangeSlider(title='Y-Axis Range (Date)',
                                         start=date(2017, 1, 1),
                                         end=date(2017, 1, 2),
                                         value=(date(2017, 1,
                                                     1), date(2017, 1, 2)),
                                         step=1)

    # Define the function that will be used now and also in the callbacks later.
    # This will allow the range_sliders to adjust to match any changes in the
    # data being plotted on the x/y axis.
    def range_slider(x_data1, y_data1, Sub_df1):

        # Start with the y-axis.
        # First need to check if 'adate' and if so edit the date range slider
        # but otherwise edit the normal slider.
        if y_data1 == 'adate':
            # Set the start, end and value fields to the full range.
            range_slider_ydate.start = Sub_df1['y'].min()
            range_slider_ydate.end = Sub_df1['y'].max()
            range_slider_ydate.value = (Sub_df1['y'].min(), Sub_df1['y'].max())
            # Step to 1 works for DateRangeSlider
            range_slider_ydate.step = 1
            # Make the DateRangeSlider visible and hide the normal RangeSlider
            range_slider_ydate.visible = True
            range_slider_y.visible = False
        else:
            # Set the start, end and value fields to the full range.
            range_slider_y.start = Sub_df1['y'].min()
            range_slider_y.end = Sub_df1['y'].max()
            range_slider_y.value = (Sub_df1['y'].min(), Sub_df1['y'].max())
            # Step to range/10000 should give sufficient granularity
            range_slider_y.step = (Sub_df1['y'].max() -
                                   Sub_df1['y'].min()) / 100000
            # Make the normal RangeSlider visible and hide the DateRangeSlider
            range_slider_y.visible = True
            range_slider_ydate.visible = False

        # Do the same for the x-axis
        if x_data1 == 'adate':
            range_slider_xdate.start = Sub_df1['x'].min()
            range_slider_xdate.end = Sub_df1['x'].max()
            range_slider_xdate.value = (Sub_df1['x'].min(), Sub_df1['x'].max())
            range_slider_xdate.step = 1
            range_slider_xdate.visible = True
            range_slider_x.visible = False
        else:
            range_slider_x.start = Sub_df1['x'].min()
            range_slider_x.end = Sub_df1['x'].max()
            range_slider_x.value = (Sub_df1['x'].min(), Sub_df1['x'].max())
            range_slider_x.step = (Sub_df1['x'].max() -
                                   Sub_df1['x'].min()) / 100000
            range_slider_x.visible = True
            range_slider_xdate.visible = False

        return

    # Run the function.
    range_slider(x_data1, y_data1, Sub_df1)

    ############################################################################
    ############################################################################

    ############################################################################
    ########################### CREATE A LAYOUT ################################

    # Create a layout to add widgets and arrange the display. This simple layout
    # displays the select widgets above the plot with the checkboxes to the
    # right (one above the other).
    #
    # More details can be found at:
    # https://docs.bokeh.org/en/latest/docs/user_guide/layout.html
    #
    # NB: More work to do here to make plots responsive to browser window size
    # (e.g. using sizing_mode = scale_both) but need to invstigate with/without
    # remote desktops.

    layout_checkbox = column([checkbox_marker, checkbox_color])
    layout_plots = column([
        select_xaxis, select_yaxis, select_legend, range_slider_x,
        range_slider_y, range_slider_xdate, range_slider_ydate, p1
    ])

    tab_layout = row([layout_plots, layout_checkbox])

    ############################################################################
    ############################################################################

    ############################################################################
    ####################### CREATE CALLBACK FUNCTIONS ##########################

    # CAVEAT: Callback functions are very complex and below is my (CB) rough
    # understanding of how they function based mainly on experience/trial and
    # error while writting these functions for other graphs. It should be taken
    # as a starting point but not as a definitive user guide.
    #
    # Callback functions are very powerful and can be based off of javascript or
    # python. The example presented here uses python but in future a javascript
    # copy should also be added.

    ######## 1)
    # This callback is designed to take inputs from the select and checkbox
    # widgets update the graph to plot the new data requested by the user.
    #
    # Syntax:
    # 	attr = 	The value passed from the on_change function before the callback
    # 			was named (e.g. in this example attr = 'value')
    # 	old = 	The value of the widget before it was changed (I.e. If a select
    # 			widget is changed from 'Output' to 'T/P Correction', then
    # 			old = 'Output'
    # 	new = 	The value of the widget after it was changed (I.e. If a select
    # 			widget is changed from 'Output' to 'T/P Correction', then
    # 			old = 'T/P Correction'
    #
    # 	NB: In general seen little need to use these inputs as you can generally
    # 	access the value of the widgets directly which seems to be more powerful
    # 	and flexible
    #
    # First define the callback function.
    def callback(attr, old, new):

        # Want to acquire the current values of all of the checkboxes and select
        # widgets to provide as inputs for the re-plot. For the checkboxes this
        # means itterating through the active list and outputting the labels
        # that are active
        color_to_plot = [
            checkbox_color.labels[i] for i in checkbox_color.active
        ]
        marker_to_plot = color_to_plot
        plot1_xdata_to_plot = select_xaxis.value
        plot1_ydata_to_plot = select_yaxis.value
        legend_location = select_legend.value

        # Use the pre-defined make_dataset function with these new inputs to
        # create a new version of the sub dataframe.
        Sub_df1 = make_dataset(color_to_plot, marker_to_plot,
                               plot1_xdata_to_plot, plot1_ydata_to_plot)

        # Use the pre-defined define_plot_parameters function with these new
        # inputs to update the plot parameters.
        x_axis_title1 = plot1_xdata_to_plot
        y_axis_title1 = plot1_ydata_to_plot
        define_plot_parameters([
            plot1_xdata_to_plot, plot1_ydata_to_plot, plot_title1,
            x_axis_title1, y_axis_title1, plot_size_height1, plot_size_width1,
            legend_location
        ])

        # Use the pre-defined tolerances function with these new inputs to
        # make a new version of the tolerances sub dataframe.
        Sub_df1_tol_TB, Sub_df1_tol_Classic = choose_tolerances(
            plot1_xdata_to_plot, plot1_ydata_to_plot, Sub_df1, color_to_plot)

        # Use the pre-defined range_slider function with these new inputs to
        # update the range sliders (this will make sure that the range sliders
        # start/end etc. match up with what's being plotted, as well as
        # displaying/hiding the RangeSlider/DateRangeSlider as needed
        range_slider(plot1_xdata_to_plot, plot1_ydata_to_plot, Sub_df1)

        # Update the ColumnDataSources using the newly created dataframes. The
        # plots look to these as the source so this changes what is being
        # plotted.
        src1.data = Sub_df1.to_dict(orient='list')
        src1_tol_TB.data = Sub_df1_tol_TB.to_dict(orient='list')
        src1_tol_Classic.data = Sub_df1_tol_Classic.to_dict(orient='list')

        return

    # Use the on_change function to call the now defined callback function
    # whenever the user changes the value in the widget.
    # NB: Other functions such as on_click are availible for other widgets.
    # Syntax:
    # 	First argument is passed to the callback as attr (see callback section
    # 	above)
    # 	Second argument is the name of the callback function to be called.
    select_xaxis.on_change('value', callback)
    select_yaxis.on_change('value', callback)
    select_legend.on_change('value', callback)
    checkbox_color.on_change('active', callback)
    checkbox_marker.on_change('active', callback)

    ######## 2)
    # This callback is designed to take inputs from the range sliders to change
    # visible range
    def callback_range(attr, old, new):

        # Check what is currently being plotted. Need this to know whether to
        # look for the values from the DateRangeSlider or the RangeSlider
        plot1_xdata_to_plot = select_xaxis.value
        plot1_ydata_to_plot = select_yaxis.value

        # Start with the x-axis
        if plot1_xdata_to_plot == 'adate':
            # If it's 'adate' then need to look at the DateRangeSlider and
            # update the start and end values of the range using the values from
            # the slider.
            # NB: range_slider.value = left_value, right_value
            p1.x_range.start, p1.x_range.end = range_slider_xdate.value
        else:
            # If it's not 'adate' then need to look at the normal RangeSlider
            p1.x_range.start, p1.x_range.end = range_slider_x.value

        # Do the same for the y-axis
        if plot1_ydata_to_plot == 'adate':
            p1.y_range.start, p1.y_range.end = range_slider_ydate.value
        else:
            p1.y_range.start, p1.y_range.end = range_slider_y.value

        return

    # Use the on_change function to call the now defined callback function
    # whenever the user changes the value in the widget.
    range_slider_x.on_change('value', callback_range)
    range_slider_y.on_change('value', callback_range)
    range_slider_xdate.on_change('value', callback_range)
    range_slider_ydate.on_change('value', callback_range)

    ############################################################################
    ############################################################################

    ############################################################################
    ####################### RETURN TO THE MAIN SCRIPT ##########################

    # Now that the script is finished and the plot created we can return to the
    # main script.
    #
    # To pass back the data for the tab we need to return a Panel with:
    # 	child = layout (the one that we made earlier with the widget and plot)
    # 	title = 'Something that makes sense as a tab label for the user'

    return Panel(child=tab_layout, title='Electron Energy')
예제 #15
0
def query():
    """Query script entry point."""

    hl.init(default_reference='GRCh38')

    mt = hl.read_matrix_table(HGDP1KG_TOBWGS)
    scores = hl.read_table(SCORES)

    # Filter outliers and related samples
    mt = mt.semi_join_cols(scores)
    mt = mt.annotate_cols(scores=scores[mt.s].scores)
    mt = mt.annotate_cols(
        study=hl.if_else(mt.s.contains('TOB'), 'TOB-WGS', 'HGDP-1kG'))

    # PCA plot must all come from the same object
    columns = mt.cols()
    pca_scores = columns.scores
    labels = columns.study
    sample_names = columns.s
    cohort_sample_codes = list(set(labels.collect()))
    tooltips = [('labels', '@label'), ('samples', '@samples')]

    # get percent variance explained
    eigenvalues = hl.import_table(EIGENVALUES)
    eigenvalues = eigenvalues.to_pandas()
    eigenvalues.columns = ['eigenvalue']
    eigenvalues = pd.to_numeric(eigenvalues.eigenvalue)
    variance = eigenvalues.divide(float(eigenvalues.sum())) * 100
    variance = variance.round(2)

    # Get number of PCs
    number_of_pcs = len(eigenvalues)

    print('Making PCA plots labelled by study')
    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        plot = figure(
            title='TOB-WGS + HGDP/1kG Dataset',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]}%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=pca_scores[pc1].collect(),
                y=pca_scores[pc2].collect(),
                label=labels.collect(),
                samples=sample_names.collect(),
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=4,
            color=factor_cmap('label', ['#1b9e77', '#d95f02'],
                              cohort_sample_codes),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'study_pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'study_pc{pc2}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)

    print('Making PCA plots labelled by the subpopulation')
    labels = columns.hgdp_1kg_metadata.labeled_subpop.collect()
    labels = ['TOB-WGS' if x is None else x for x in labels]
    subpopulation = list(set(labels))
    # change ordering of subpopulations
    # so TOB-WGS is at the end and glyphs appear on top
    subpopulation.append(subpopulation.pop(subpopulation.index('TOB-WGS')))
    tooltips = [('labels', '@label'), ('samples', '@samples')]

    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        plot = figure(
            title='Subpopulation',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]}%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=pca_scores[pc1].collect(),
                y=pca_scores[pc2].collect(),
                label=labels,
                samples=sample_names.collect(),
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=4,
            color=factor_cmap('label', turbo(len(subpopulation)),
                              subpopulation),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'subpopulation_pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'subpopulation_pc{pc2}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)
예제 #16
0
def Create_Legend(df, color_column, custom_color_boolean, custom_color_palette,
                  marker_column, custom_marker_boolean, custom_marker_palette):

    ######### Colors:

    # Create a color list based on the unique entries in one of the database
    # columns (specified by the tab author).
    color_list = sorted(df[color_column].unique().tolist())

    # First check if the writer wants to use a custom set or if they're happy
    # with the defaults in here. Note that the custom set can be entered as
    # fuction (e.g. turbo), a dictionary (e.g. Colorblind) or a list (e.g. a
    # user specified list of hex values). Therefore need to check for type so
    # the legend can be built correctly.
    if custom_color_boolean == True:
        if isinstance(custom_color_palette, types.FunctionType):
            # If it's a function then it's probably one of the 256 value large
            # palettes supplied by Bokeh. This will throw an error if you have
            # more unique items than accepted inputs to the Bokeh funcion.
            color_palette = list(custom_color_palette(len(color_list)))
        elif isinstance(custom_color_palette, dict):
            # If it's a dictionary then it's probably one of the smaller
            # palettes supplied by Bokeh. This will throw an error if you have
            # more or less unique items than keys in the dictionary.
            color_palette = list(custom_color_palette[len(color_list)])
        elif isinstance(custom_color_palette, tuple) or isinstance(
                custom_color_palette, list):
            if len(color_list) > len(custom_color_palette):
                print( 'Error - Not enough colors in custom palette to ' \
                  'assign a unique marker to each option.' )
                exit()
            # Set color_palette and turn it into a list as this will help if it
            # need to be changed later (tuples cannot be altered).
            color_palette = list(custom_color_palette)
        else:
            print('Error - Unsuported type of custom_color_palette')
            exit()
    # If custom_color_palette is not requested by the writter then will want to
    # use the default options. The default is the Colorblind palette if it is
    # large enough and otherwise use the large Turbo palette. (Will error out if
    # Turbo is not large enough.
    else:
        if (len(color_list) < 8) and (len(color_list) > 2):
            color_palette = list(Colorblind[len(color_list)])
        else:
            # This will throw an error if you have more than 256 unique items
            # (max number of colors in the Turbo palette
            color_palette = list(turbo(len(color_list)))

    ######### Markers:

    # Create a marker list based on the unique entries in one of the database
    # columns (specified by the tab author).
    marker_list = sorted(df[marker_column].unique().tolist())

    # If a custom marker is to be used then set it as the marker_palette
    if custom_marker_boolean == True:
        marker_palette = custom_marker_palette
    # Else use the default list (this was set by CB in an order to try and keep
    # as good a contrast between items as possible as the marker_list grows in
    # size (i.e. having 'better' markers at the begining and saving the 'worse'
    # ones for the end where they may not be used)).
    else:
        marker_palette = [
            'circle', 'square', 'triangle', 'diamond', 'inverted_triangle',
            'hex', 'circle_cross', 'square_cross', 'diamond_cross', 'asterisk',
            'cross', 'x', 'circle_x', 'square_x', 'dash'
        ]

    # Make sure there are enough markers to assign unique markers to each option
    if len(marker_list) > len(marker_palette):
        print( 'Error - Not enough markers to assign a unique marker to ' \
          'each option.' )
        exit()

    ######### Legend Key:
    # Create a function that will be used to run through the dataframe looking
    # at the colomns chosen for the color and marker and creating a new 'marker_color'
    # column that can be used for the legend (unless the same column is being
    # used for both marker and color, in which case set legend as 'marker')

    def add_legend_to_df(df):
        def add_legend(row):
            if marker_column == color_column:
                return str(str(row[marker_column]))
            else:
                return str(
                    str(row[marker_column]) + '_' + str(row[color_column]))

        # Run the function.
        df.loc[:, 'legend'] = df.apply(lambda row: add_legend(row), axis=1)
        df.loc[:, 'color1'] = df.loc[:, color_column]
        df.loc[:, 'marker1'] = df.loc[:, marker_column]
        return df

    # Run the now defined function
    df = add_legend_to_df(df)

    return (color_list, color_palette, marker_list, marker_palette, df,
            add_legend_to_df)
예제 #17
0
def query():
    """Query script entry point."""

    hl.init(default_reference='GRCh38')

    scores = hl.read_table(SCORES)
    scores = scores.annotate(
        study=hl.if_else(scores.s.contains('TOB'), 'TOB-WGS', 'HGDP-1kG'))
    sample_names = scores.s.collect()
    labels = scores.study.collect()
    study = list(set(labels))
    tooltips = [('labels', '@label'), ('samples', '@samples')]
    eigenvalues = hl.import_table(EIGENVALUES)
    eigenvalues = eigenvalues.to_pandas()
    eigenvalues.columns = ['eigenvalue']
    eigenvalues = pd.to_numeric(eigenvalues.eigenvalue)
    variance = eigenvalues.divide(float(eigenvalues.sum())) * 100
    variance = variance.round(2)

    # Get number of PCs
    number_of_pcs = len(eigenvalues)

    # plot by study
    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        plot = figure(
            title='Study',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]})%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=scores.scores[pc1].collect(),
                y=scores.scores[pc2].collect(),
                label=labels,
                samples=sample_names,
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=4,
            color=factor_cmap('label', ['#1b9e77', '#d95f02'], study),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'study_pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'study_pc{pc2}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)

    # plot by continental population
    hgdp1kg_tobwgs = hl.read_matrix_table(HGDP1KG_TOBWGS)
    scores = scores.annotate(continental_pop=hgdp1kg_tobwgs.cols()[
        scores.s].hgdp_1kg_metadata.population_inference.pop)
    labels = scores.continental_pop.collect()
    # Change TOB-WGS 'none' values to 'TOB-WGS'
    labels = ['TOB-NFE' if x is None else x for x in labels]
    continental_population = list(set(labels))
    tooltips = [('labels', '@label'), ('samples', '@samples')]

    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        plot = figure(
            title='Continental Population',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]})%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=scores.scores[pc1].collect(),
                y=scores.scores[pc2].collect(),
                label=labels,
                samples=sample_names,
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=4,
            color=factor_cmap('label', turbo(len(continental_population)),
                              continental_population),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'continental_pop_pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'continental_pop_pc{pc2}.html',
                                         'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)

    # plot by subpopulation
    scores = scores.annotate(subpop=hgdp1kg_tobwgs.cols()[
        scores.s].hgdp_1kg_metadata.labeled_subpop)
    labels = scores.subpop.collect()
    labels = ['TOB-NFE' if x is None else x for x in labels]
    sub_population = list(set(labels))
    tooltips = [('labels', '@label'), ('samples', '@samples')]

    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        plot = figure(
            title='Subpopulation',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]})%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=scores.scores[pc1].collect(),
                y=scores.scores[pc2].collect(),
                label=labels,
                samples=sample_names,
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=4,
            color=factor_cmap('label', turbo(len(sub_population)),
                              sub_population),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'subpop_pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'subpop_pc{pc2}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)

    # Plot loadings
    loadings_ht = hl.read_table(LOADINGS)
    for i in range(0, (number_of_pcs)):
        pc = i + 1
        plot = manhattan_loadings(
            pvals=hl.abs(loadings_ht.loadings[i]),
            locus=loadings_ht.locus,
            title='Loadings of PC ' + str(pc),
            collect_all=True,
        )
        plot_filename = output_path(f'loadings_pc{pc}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'loadings_pc{pc}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)
예제 #18
0
def _bokeh_timeseries_plots(varnames,
                            time_units,
                            var_units,
                            phase_names,
                            phases_node_path,
                            last_solution_case,
                            last_simulation_case,
                            plot_dir_path,
                            num_cols=2,
                            bg_fill_color='#282828',
                            grid_line_color='#666666',
                            open_browser=False):
    from bokeh.io import output_notebook, output_file, save, show
    from bokeh.layouts import gridplot, column, row, grid, layout
    from bokeh.models import Legend, LegendItem
    from bokeh.plotting import figure
    import bokeh.palettes as bp

    if dymos_options['notebook_mode']:
        output_notebook()
    else:
        output_file(os.path.join(plot_dir_path, 'plots.html'))

    # Prune the edges from the color map
    cmap = bp.turbo(len(phase_names) + 2)[1:-1]
    figures = []
    colors = {}
    sol_plots = {}
    sim_plots = {}

    # Get the minimum and maximum times in any phase, so when we plot a variable that only exists
    # in a few phases, it is plotted against the entire time range.
    min_time = 1.0E21
    max_time = -1.0E21

    for iphase, phase_name in enumerate(phase_names):
        if phases_node_path:
            time_name = f'{phases_node_path}.{phase_name}.timeseries.time'
        else:
            time_name = f'{phase_name}.timeseries.time'
        min_time = min(min_time, np.min(last_solution_case.outputs[time_name]))
        max_time = max(max_time, np.max(last_solution_case.outputs[time_name]))
        colors[phase_name] = cmap[iphase]

    for ivar, var_name in enumerate(varnames):
        # Get the labels
        time_label = f'time ({time_units[var_name]})'
        var_label = f'{var_name} ({var_units[var_name]})'
        title = f'timeseries.{var_name}'

        # add labels, title, and legend
        padding = 0.05 * (max_time - min_time)
        fig = figure(title=title,
                     background_fill_color=bg_fill_color,
                     x_range=(min_time - padding, max_time + padding),
                     plot_width=180,
                     plot_height=180)
        fig.xaxis.axis_label = time_label
        fig.yaxis.axis_label = var_label
        fig.xgrid.grid_line_color = grid_line_color
        fig.ygrid.grid_line_color = grid_line_color

        # Plot each phase
        for iphase, phase_name in enumerate(phase_names):
            sol_color = cmap[iphase]
            sim_color = cmap[iphase]

            if phases_node_path:
                var_name_full = f'{phases_node_path}.{phase_name}.timeseries.{var_name}'
                time_name = f'{phases_node_path}.{phase_name}.timeseries.time'
            else:
                var_name_full = f'{phase_name}.timeseries.{var_name}'
                time_name = f'{phase_name}.timeseries.time'

            # Get values
            if var_name_full not in last_solution_case.outputs:
                continue

            var_val = last_solution_case.outputs[var_name_full]
            time_val = last_solution_case.outputs[time_name]

            for idxs, i in np.ndenumerate(np.zeros(var_val.shape[1:])):
                var_val_i = var_val[:, idxs].ravel()
                sol_plots[phase_name] = fig.circle(time_val.ravel(),
                                                   var_val_i,
                                                   size=5,
                                                   color=sol_color,
                                                   name='sol:' + phase_name)

            # get simulation values, if plotting simulation
            if last_simulation_case:
                # if the phases_node_path is empty, need to pre-pend names with "sim_traj."
                #   as that is pre-pended in Trajectory.simulate code
                sim_prefix = '' if phases_node_path else 'sim_traj.'
                var_val_simulate = last_simulation_case.outputs[sim_prefix +
                                                                var_name_full]
                time_val_simulate = last_simulation_case.outputs[sim_prefix +
                                                                 time_name]
                for idxs, i in np.ndenumerate(
                        np.zeros(var_val_simulate.shape[1:])):
                    var_val_i = var_val_simulate[:, idxs].ravel()
                    sim_plots[phase_name] = fig.line(time_val_simulate.ravel(),
                                                     var_val_i,
                                                     line_dash='solid',
                                                     line_width=0.5,
                                                     color=sim_color,
                                                     name='sim:' + phase_name)
        figures.append(fig)

    # Implement a single legend for all figures using the example here:
    # https://stackoverflow.com/a/56825812/754536

    # ## Use a dummy figure for the LEGEND
    dum_fig = figure(outline_line_alpha=0,
                     toolbar_location=None,
                     background_fill_color=bg_fill_color,
                     plot_width=250,
                     max_width=250)

    # set the components of the figure invisible
    for fig_component in [
            dum_fig.grid, dum_fig.ygrid, dum_fig.xaxis, dum_fig.yaxis
    ]:
        fig_component.visible = False

    # The glyphs referred by the legend need to be present in the figure that holds the legend,
    # so we must add them to the figure renderers.
    sol_legend_items = [(phase_name + ' solution', [
        dum_fig.circle([0], [0],
                       size=5,
                       color=colors[phase_name],
                       tags=['sol:' + phase_name])
    ]) for phase_name in phase_names]
    sim_legend_items = [(phase_name + ' simulation', [
        dum_fig.line([0], [0],
                     line_dash='solid',
                     line_width=0.5,
                     color=colors[phase_name],
                     tags=['sim:' + phase_name])
    ]) for phase_name in phase_names]
    legend_items = [
        j for i in zip(sol_legend_items, sim_legend_items) for j in i
    ]

    # # set the figure range outside of the range of all glyphs
    dum_fig.x_range.end = 1005
    dum_fig.x_range.start = 1000

    legend = Legend(click_policy='hide',
                    location='top_left',
                    border_line_alpha=0,
                    items=legend_items,
                    background_fill_alpha=0.0,
                    label_text_color='white',
                    label_width=120,
                    spacing=10)

    dum_fig.add_layout(legend, place='center')

    gd = gridplot(figures, ncols=num_cols, sizing_mode='scale_both')

    plots = gridplot([[gd, column(dum_fig, sizing_mode='stretch_height')]],
                     toolbar_location=None,
                     sizing_mode='scale_both')

    if dymos_options['notebook_mode'] or open_browser:
        show(plots)
    else:
        save(plots)
예제 #19
0
def main(number_of_pcs: int):  # pylint: disable=too-many-locals
    """Query script entry point."""

    hl.init()

    mt = hl.read_matrix_table(HGDP1KG_TOBWGS)
    scores = hl.read_table(SCORES)
    mt = mt.annotate_cols(scores=scores[mt.s].scores)
    mt = mt.annotate_cols(TOB_WGS=mt.s.contains('TOB'))

    # PCA plot must all come from the same object
    columns = mt.cols()
    pca_scores = columns.scores
    labels = columns.TOB_WGS

    # get percent variance explained
    eigenvalues = pd.read_csv(EIGENVALUES)
    eigenvalues.columns = ['eigenvalue']
    variance = eigenvalues['eigenvalue'].divide(float(eigenvalues.sum())) * 100
    variance = variance.round(2)

    print('Making PCA plots labelled by the study ID')
    for i in range(0, number_of_pcs):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        p = hl.plot.scatter(
            pca_scores[pc1],
            pca_scores[pc2],
            label=labels,
            title='TOB-WGS',
            xlabel='PC' + str(pc1 + 1) + ' (' + str(variance[pc1]) + '%)',
            ylabel='PC' + str(pc2 + 1) + ' (' + str(variance[pc2]) + '%)',
        )
        show(p)

    print('Making PCA plots labelled by the continental population')
    labels = columns.hgdp_1kg_metadata.population_inference.pop
    pops = list(set(labels.collect()))
    hover_fields = dict([('s', columns.s)])

    for i in range(0, number_of_pcs):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        p = hl.plot.scatter(
            pca_scores[pc1],
            pca_scores[pc2],
            label=labels,
            title='Continental Population',
            xlabel='PC' + str(pc1 + 1) + ' (' + str(variance[pc1]) + '%)',
            ylabel='PC' + str(pc2 + 1) + ' (' + str(variance[pc2]) + '%)',
            collect_all=True,
            colors=CategoricalColorMapper(palette=turbo(len(pops)),
                                          factors=pops),
            hover_fields=hover_fields,
        )
        show(p)

    print('Making PCA plots labelled by the subpopulation')
    labels = columns.hgdp_1kg_metadata.labeled_subpop
    pops = list(set(labels.collect()))

    for i in range(0, number_of_pcs):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        p = hl.plot.scatter(
            pca_scores[pc1],
            pca_scores[pc2],
            label=labels,
            title='Sub-Population',
            xlabel='PC' + str(pc1 + 1) + ' (' + str(variance[pc1]) + '%)',
            ylabel='PC' + str(pc2 + 1) + ' (' + str(variance[pc2]) + '%)',
            collect_all=True,
            colors=CategoricalColorMapper(palette=turbo(len(pops)),
                                          factors=pops),
        )
        show(p)