Esempio n. 1
0
def histogram_month(filename, metrics, key, title_str, ylabel):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    data_months = {
        'index': metrics['A'][key].index,
        metrics['A']['name']: metrics['A']['frame_months'].frequency,
        metrics['B']['name']: metrics['B'][key].frequency
    }
    fig = bkh.figure(x_axis_type='datetime',
                     title=title_str,
                     width=720,
                     height=480)
    fig.vbar(x='index',
             top='frequency',
             width=timedelta(days=10),
             source=metrics['A'][key],
             color=colors[0],
             legend=metrics['A']['name'])
    fig.vbar(x='index',
             top='frequency',
             width=timedelta(days=10),
             source=metrics['B'][key],
             color=colors[1],
             legend=metrics['B']['name'])
    fig.xaxis.axis_label = 'Date'
    fig.yaxis.axis_label = ylabel
    bkh.show(fig)
    return
Esempio n. 2
0
def plot_comp_ave( x_axi, wholedata, trainPredict, trainPredict_2, testPredict, 
	startpoint, test_num, ave_len, title='plot', path=None, filename='plot' ):

	the_dir = 'plot' if path == None else str(path)

	os.mkdir( the_dir ) if not os.path.isdir(the_dir) else None

	output_file( the_dir + '/' + str( filename ) + '.html' )
	
	p = figure(title=title, x_axis_label='Time', y_axis_label='Load', x_axis_type="datetime", plot_width=1200)
	p.line( x_axi[ : -ave_len+1 ], wholedata[:,0] )

	test_pred_start = test_num[0] + startpoint
	train_2_start = test_num[1] + startpoint

	if trainPredict.shape[ -1 ] == 1:
		p.line( x_axi[ startpoint : startpoint+len( trainPredict ) ], trainPredict[:,0], color="#B3DE69" )
		p.line( x_axi[ test_pred_start : test_pred_start+len( testPredict ) ], testPredict[:,0], color="#CAB2D6" )
		p.line( x_axi[ train_2_start : train_2_start+len( trainPredict_2 ) ], trainPredict_2[:,0], color="#B3DE69" )
	else:
		p.line( x_axi[ startpoint : startpoint+len( trainPredict ) ], trainPredict, color="#B3DE69" )
		p.line( x_axi[ test_pred_start : test_pred_start+len( testPredict ) ], testPredict, color="#CAB2D6" )
		p.line( x_axi[ train_2_start : train_2_start+len( trainPredict_2 ) ], trainPredict_2, color="#B3DE69" )

	show(p)
	reset_output()
def histogram_month(filename, metrics, key, title_str, ylabel):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    data_months = {
        "index": metrics["A"][key].index,
        metrics["A"]["name"]: metrics["A"]["frame_months"].frequency,
        metrics["B"]["name"]: metrics["B"][key].frequency,
    }
    fig = bkh.figure(x_axis_type="datetime",
                     title=title_str,
                     width=720,
                     height=480)
    fig.vbar(
        x="index",
        top="frequency",
        width=timedelta(days=10),
        source=metrics["A"][key],
        color=colors[0],
        legend=metrics["A"]["name"],
    )
    fig.vbar(
        x="index",
        top="frequency",
        width=timedelta(days=10),
        source=metrics["B"][key],
        color=colors[1],
        legend=metrics["B"]["name"],
    )
    fig.xaxis.axis_label = "Date"
    fig.yaxis.axis_label = ylabel
    bkh.show(fig)
    return
Esempio n. 4
0
def variable_correction_plots(station, dt_array, var_one, corr_var_one, var_two, corr_var_two, code, folder_path):
    x_size = 800
    y_size = 350
    reset_output()  # clears bokeh output, prevents ballooning file sizes

    delta_var_one = corr_var_one - var_one
    delta_var_two = corr_var_two - var_two

    with np.errstate(divide='ignore', invalid='ignore'):  # Silencing all errors when we divide by a nan
        prct_var_one = ((corr_var_one - var_one) / var_one) * 100.0
        prct_var_two = ((corr_var_two - var_two) / var_two) * 100.0

    # Obtain title based on variables passed for file name
    (units, title, var_one_name, var_one_color, var_two_name, var_two_color) = generate_line_plot_features(code, '')
    output_file(folder_path + "/correction_files/" + station + "_" + title + "_correction_plots.html")

    original_plot = line_plot(x_size, y_size, dt_array, var_one, var_two, code, station + ' Original ', link_plot=None)

    corrected_plot = line_plot(x_size, y_size, dt_array, corr_var_one, corr_var_two, code, 'Corrected ',
                               link_plot=original_plot)

    delta_plot = line_plot(x_size, y_size, dt_array, delta_var_one, delta_var_two, code, 'Deltas of ',
                           link_plot=original_plot)

    percent_plot = line_plot(x_size, y_size, dt_array, prct_var_one, prct_var_two, code, '% Difference of ',
                             link_plot=original_plot)

    corr_fig = gridplot([[original_plot], [corrected_plot], [delta_plot], [percent_plot]],
                        toolbar_location="left")
    return corr_fig
Esempio n. 5
0
    def __init__(self,
                 df,
                 params=[],
                 logify=False,
                 output='notebook',
                 notebook_url="http://localhost:8888",
                 **kwargs):
        self.df = df
        self.params = params
        self.logify = logify
        self.kwargs = kwargs

        if output == 'notebook':
            reset_output()
            output_notebook()
            show(self.modify_doc, notebook_url=notebook_url)
        else:
            reset_output()
            server = Server({'/': self.modify_doc})
            server.start()
            try:
                server = Server({'/': self.modify_doc})
                server.run_until_shutdown()
            except:
                pass
                #print("Server running")
            server.show("/")
            self.server = server
Esempio n. 6
0
def plotPreds(prediction, test, outputDir, parametersSet):
    reset_output()
    stocks = test.columns.values
    
    dataTest = test.reset_index()
    output_file(outputDir + '_'.join(parametersSet) + '_predPerf.html')
    colors_list = ['green', 'red']
    
    grid = []
    subGrid = []
    for i, stock in enumerate(sorted(stocks)):
        if i % 3 == 0 and i != 0:
            grid.append(subGrid)
            subGrid = []
        legends_list = [stock, 'reconstruction']
        xs = [dataTest['Date'], dataTest['Date']]
        ys = [dataTest[stock], prediction[stock]]
        
        p = figure(x_axis_type="datetime",
                   y_axis_label = "Log-return")
        for (colr, leg, x, y ) in zip(colors_list, legends_list, xs, ys):
            p.line(x, y, color=colr, legend=leg)
        subGrid.append(p)
    p = gridplot(grid)
    save(p)
    return True
Esempio n. 7
0
def plot_flare_bokeh(time, flux_pca, flux = None, flux_type ="",width_fig = 900, height_fig = 500, title=""):
    # output form
    reset_output()

    output_notebook()

    TOOLTIPS = [
        ("index", "$index"),
        ("(x,y)", "($x, $y)"),
    ]

    # graph setting
    p = figure(tooltips=TOOLTIPS, title=title, x_axis_label="Time", y_axis_label="Flux",  plot_width = width_fig, plot_height=height_fig)

    # main body for  plotting
    #if flux is not None:

        #p.circle(time, flux, legend="raw" + "(%s)" % flux_type, color="red")
        #p.line(time, flux, legend="raw"+ "(%s)" % flux_type, color="red")
    p.circle(time, flux_pca, color="black")
    p.line(time, flux_pca, color="black")

    #p.add_layout(p.legend[0], "right") # 凡例をグラフの外に出す(右側)

    # Hide legend if you click
    #p.legend.click_policy = "hide"

    # Output format (svg or png)
    p.output_backend = "svg"
    
    # Show
    show(p)
Esempio n. 8
0
def save_to_html_for_account_code(new_df, filename, account_code):
    reset_output()
    output_file(filename)

    sample = new_df.loc[new_df['account_code'] == account_code]
    source = ColumnDataSource(sample)
    clients = source.data['client_name'].tolist()
    p = figure(x_range=clients)
    p.vbar(x='client_name',
           top='test_prediction',
           source=source,
           width=0.50,
           color='red')
    p.xaxis.major_label_orientation = "vertical"

    p.title.text = 'Bank Marketing Predictions'
    p.yaxis.axis_label = 'Prediction rate'

    hover = HoverTool()
    hover.tooltips = [('Client Name', '@client_name'),
                      ('Account Code', '@account_code'), ('Age', '@age'),
                      ('Campaign', '@campaign'), ('Pdays', '@pdays'),
                      ('Previous', '@previous'),
                      ('Marital status', '@marital_married'),
                      ('Target', '@test_prediction')]
    p.add_tools(hover)
    save(p)
Esempio n. 9
0
def plotResiduals(residuals, outputDir, parametersSet, who):
    reset_output()
    stocks = residuals.columns.values
    
    res = residuals.reset_index()
    output_file(outputDir + '_'.join(parametersSet)  + '_residuals_' + who + '.html')
    
    grid = []
    subGrid = []
    for i, stock in enumerate(sorted(stocks)):
        if i % 3 == 0 and i != 0:
            grid.append(subGrid)
            subGrid = []
        p1 = figure(title=stock + ' ' + who + ' residuals', background_fill_color="#E8DDCB", x_axis_label='r - r_hat')
        p1.yaxis.visible = None
        p1.legend.location = "top_left"
        hist, edges = np.histogram(res[stock], density=True, bins=25)
        p1.quad(top=hist,
                bottom=0,
                left=edges[:-1],
                right=edges[1:],
                fill_color="#036564",
                line_color="#033649")
        subGrid.append(p1)
    p = gridplot(grid)
    save(p)
    return True
Esempio n. 10
0
def histogram_weekdays(filename, metrics):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    weekdays = [
        'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
        'Sunday'
    ]
    fig = bkh.figure(x_range=weekdays,
                     title='Message distribution over weekdays',
                     width=720,
                     height=480)
    fig.vbar(x=dodge('index', 0.35, range=fig.x_range),
             top='frequency',
             width=0.3,
             source=metrics['A']['frame_weekdays'],
             color=colors[0],
             legend=metrics['A']['name'])
    fig.vbar(x=dodge('index', 0.65, range=fig.x_range),
             top='frequency',
             width=0.3,
             source=metrics['B']['frame_weekdays'],
             color=colors[1],
             legend=metrics['B']['name'])
    fig.xaxis.axis_label = 'Weekday'
    fig.yaxis.axis_label = 'Message count'
    bkh.show(fig)
    return
Esempio n. 11
0
def histogram_hourofday(filename, metrics, key, title_str, ylabel):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    hours = [
        '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00',
        '08:00', '09:00', '10:00', '11:00', '12:00', '13:00', '14:00', '15:00',
        '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00'
    ]
    fig = bkh.figure(x_range=hours, title=title_str, width=1280, height=480)
    fig.vbar(x=dodge('index', 0.35, range=fig.x_range),
             top='frequency',
             width=0.3,
             source=metrics['A'][key],
             color=colors[0],
             legend=metrics['A']['name'])
    fig.vbar(x=dodge('index', 0.65, range=fig.x_range),
             top='frequency',
             width=0.3,
             source=metrics['B'][key],
             color=colors[1],
             legend=metrics['B']['name'])
    fig.xaxis.axis_label = 'Time'
    fig.yaxis.axis_label = ylabel
    bkh.show(fig)
    return
Esempio n. 12
0
    def __init__(self,
                 df,
                 params=[],
                 trim_factor=1,
                 logify=False,
                 output='notebook',
                 port=5006,
                 notebook_url="http://localhost:8888",
                 **kwargs):
        self.df = df.iloc[::trim_factor].reset_index(drop=True)
        self.params = params
        self.logify = logify
        self.kwargs = kwargs

        if output == 'notebook':
            reset_output()
            output_notebook()
            show(self.modify_doc, notebook_url=notebook_url)
        elif output == 'server':
            reset_output()
            server = Server({'/': self.modify_doc}, port=port)
            server.start()
            try:
                server.run_until_shutdown()
            except:
                print("Server already running")
            self.server = server
Esempio n. 13
0
def bokeh_simple_barchart(absciss,
                          ordinate,
                          title,
                          graph_dir,
                          graph_name,
                          dump_jpg,
                          show_html,
                          width=1800,
                          height=800):
    """ Dump simple bokeh barchart with single categorical value """
    """ create an output graphics file """
    output_file(os.path.join(graph_dir, graph_name + ".html"))

    p = figure(x_range=absciss,
               plot_height=height,
               plot_width=width,
               title=title,
               toolbar_location=None,
               tools="")

    p.vbar(x=absciss, top=ordinate, width=0.9)

    p.xgrid.grid_line_color = None
    p.y_range.start = 0
    """ show figure in browser """
    if show_html == True:
        show(p)
    """ dump figure as png file """
    if dump_jpg == True:
        export_png(p, filename=os.path.join(graph_dir, graph_name + ".png"))

    reset_output()
Esempio n. 14
0
 def showScatterPlot(self):
     """
     scatter plot visualization
     """
     reset_output()
     output_file('showme2.html')
     show(self.createScatterPlot())
Esempio n. 15
0
    def test_heatmap_recipe(self):
        ar_downsample._loadAR()
        reset_output()
        sess = Session(client=app.test_client())
        output_server('Census', session=sess)
        source = ServerDataSource(expr={
            'op': 'Field',
            'args': [':leaf', 'bivariate']
        })
        plot = figure(plot_width=600, plot_height=400, title="Test Title")
        plot.square('A', 'B', source=source)

        plot2 = ar_downsample.heatmap(plot,
                                      palette="Reds9",
                                      reserve_val=0,
                                      points=True,
                                      client_color=True,
                                      title="Test Title 2")
        source2 = self._find_source(plot2)

        self.assertEquals("Test Title 2", plot2.title)
        self.assertEquals(type(source2), ServerDataSource)

        transform = source2.transform
        self.assertEquals(type(transform['info']), ar_downsample.Const)
        self.assertEquals(type(transform['agg']), ar_downsample.Count)
        self.assertEquals(type(transform['shader']), ar_downsample.Seq)
        self.assertEquals(transform['shader'].out, "image")
Esempio n. 16
0
def histogram_month_chars(filename, metrics):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    data_months = {
        'index': metrics['A']['frame_months_chars'].index,
        metrics['A']['name']: metrics['A']['frame_months_chars'].frequency,
        metrics['B']['name']: metrics['B']['frame_months_chars'].frequency
    }
    fig = bkh.figure(x_axis_type='datetime',
                     title='Monthly character count over time per person',
                     width=720,
                     height=480)
    fig.vbar(x='index',
             top='frequency',
             width=timedelta(days=10),
             source=metrics['A']['frame_months_chars'],
             color=colors[0],
             legend=metrics['A']['name'])
    fig.vbar(x='index',
             top='frequency',
             width=timedelta(days=10),
             source=metrics['B']['frame_months_chars'],
             color=colors[1],
             legend=metrics['B']['name'])
    fig.xaxis.axis_label = 'Date'
    fig.yaxis.axis_label = 'Number of characters'
    bkh.show(fig)
    return
Esempio n. 17
0
def plot_file(file, path, plot_dir):

    dt = pd.read_csv(os.path.join(path, file))

    axis_datetime = np.array(dt['Time'], dtype=np.datetime64)

    col_list = list(i for i in dt.columns.values if i != 'Time')

    if not os.path.isdir(plot_dir):
        os.mkdir(plot_dir)

    full_path = os.path.join(plot_dir, file.split('.')[0] + '.html')

    output_file(full_path)

    plot_list = []

    for c in col_list:
        p = figure(title=c,
                   x_axis_label='Time',
                   y_axis_label=c,
                   x_axis_type="datetime",
                   plot_width=1200)
        p.line(axis_datetime, dt[c])
        plot_list.append(p)

    p = column(plot_list)

    show(p)

    reset_output()
def histogram_month_chars(filename, metrics):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    data_months = {
        "index": metrics["A"]["frame_months_chars"].index,
        metrics["A"]["name"]: metrics["A"]["frame_months_chars"].frequency,
        metrics["B"]["name"]: metrics["B"]["frame_months_chars"].frequency,
    }
    fig = bkh.figure(
        x_axis_type="datetime",
        title="Monthly character count over time per person",
        width=720,
        height=480,
    )
    fig.vbar(
        x="index",
        top="frequency",
        width=timedelta(days=10),
        source=metrics["A"]["frame_months_chars"],
        color=colors[0],
        legend=metrics["A"]["name"],
    )
    fig.vbar(
        x="index",
        top="frequency",
        width=timedelta(days=10),
        source=metrics["B"]["frame_months_chars"],
        color=colors[1],
        legend=metrics["B"]["name"],
    )
    fig.xaxis.axis_label = "Date"
    fig.yaxis.axis_label = "Number of characters"
    bkh.show(fig)
    return
Esempio n. 19
0
def plot_heart_rate_variability(apple_watch):
    """
    Generate swarm-like plots of heart rate variability measures for multiple days

    :param apple_watch: data frame of heart rate variability data
    :return: None
    """
    logger.info('Loading and Plotting Heart Rate Variability Data')
    df = apple_watch.load_heart_rate_variability_data()
    df = df[(df['start_timestamp'] > START_DATE)
            & (df['start_timestamp'] < END_DATE)]
    df['date'] = list(
        map(lambda d: d.strftime('%m/%d/%y'), df['start_timestamp']))
    df['time'] = list(
        map(lambda d: d.strftime('%H:%M:%S'), df['start_timestamp']))
    dates = list(df['date'].unique())

    # remove instantaneous data, bokeh doesn't not like dictionary format
    del df['instantaneous_bpm']
    source = ColumnDataSource(df)
    plot = figure(width=800,
                  height=600,
                  x_range=dates,
                  x_axis_label='Date',
                  y_axis_label='Time Between Heart Beats (ms)',
                  title='Apple Watch Heart Rate Variability (SDNN)',
                  tools='pan, wheel_zoom, box_zoom, reset, hover',
                  toolbar_location='above',
                  sizing_mode='scale_both')

    # add color map for dates
    dates_cmap = factor_cmap('date', palette=Category20_20, factors=dates)

    plot.circle(x='date',
                y='heart_rate_variability',
                source=source,
                size=12,
                fill_color=dates_cmap)
    plot.xaxis.axis_label_text_font_size = "14pt"
    plot.xaxis.major_label_text_font_size = "12pt"
    plot.yaxis.axis_label_text_font_size = "14pt"
    plot.yaxis.major_label_text_font_size = "12pt"
    plot.title.text_font_size = '16pt'

    # configure hover tool
    plot.select_one(HoverTool).tooltips = [
        ('date', '@date'), ('time', '@time'),
        ('time interval', '@heart_rate_variability')
    ]

    if SHOW_PLOTS:
        show(plot, browser='chrome')
    save_plot(plot, 'heart_rate_variability')
    # clear output mode for next plot
    reset_output()

    # save dataframe
    df.to_csv('apple_watch_data/heart_rate_variability.csv', index=False)
Esempio n. 20
0
def readTempData():
    attacks = ['190C', '200C', '210C', '220C', '230C']
    # Create a dictionary to store each figure in
    p = {}
    legendColors = ['navy', 'olive', 'firebrick', 'orange', 'purple']
    titles = [
        'Lowered to 190C', 'Lowered to 200C', 'Control - 210C',
        'Raised to 220C', 'Raised to 230C'
    ]
    i = int(1)
    # Iterate through all 5 temperature attacks (190C-230C)
    for x in attacks:
        # Format the Bokeh plots for the temperature graphs
        p[x] = figure(width=900,
                      plot_height=600,
                      title=titles[i - 1],
                      x_axis_label='Strain',
                      y_axis_label='Stress (MPa)',
                      x_range=(0, 0.0225),
                      y_range=(0, 34))
        output_file("Attack_5_" + x + ".html")
        p[x].title.text_font = 'Segoe UI'
        p[x].title.text_font_size = '26pt'
        p[x].title.align = 'center'
        p[x].xaxis.axis_label_text_font_size = '26pt'
        p[x].xaxis.major_label_text_font_size = '24pt'
        p[x].yaxis.axis_label_text_font_size = '26pt'
        p[x].yaxis.major_label_text_font_size = '24pt'
        p[x].min_border = 35
        # Load the mat file for each temperature
        mat = sio.loadmat('Attack_5_' + x + '.mat')
        # Iterate through all five specimens
        for specimen in range(5):
            # print("Specmimen:",specimen+1)
            # Assign values for stress and strain from the MAT file
            stress = mat['Temp_Test_Batch_' + str(i) + '_' +
                         x]['stress'][0][0][:, specimen]
            strain = mat['Temp_Test_Batch_' + str(i) + '_' +
                         x]['strain'][0][0][:, specimen]
            p[x].line(strain,
                      stress,
                      legend=None,
                      line_width=1,
                      line_color=legendColors[specimen])
        i = i + 1
        # Write output files
        outputWrite = 'Plot_' + x + '.png'
        # export_png(p,filename=outputWrite)
        print("Finished Writing File: " + outputWrite)
        reset_output()
    # Debugging
    # l=gridplot([[p['190C']],[p['200C']],[p['210C']],[p['220C']],[p['230C']]])   # Vertical
    l = gridplot([[p['190C'], p['200C']], [p['210C'], None],
                  [p['220C'], p['230C']]])  # Horizontal
    # export_png(l,filename='Gridplot.png')
    show(l)
Esempio n. 21
0
 def test_reset_output(self):
     plt._default_document = 10
     plt._default_session = 10
     plt._default_file = 10
     plt._default_notebook = 10
     plt.reset_output()
     self.assertTrue(isinstance(plt._default_document, plt.Document))
     self.assertEqual(plt._default_session, None)
     self.assertEqual(plt._default_file, None)
     self.assertEqual(plt._default_notebook, None)
Esempio n. 22
0
 def test_reset_output(self):
     plt._default_document = 10
     plt._default_session = 10
     plt._default_file = 10
     plt._default_notebook = 10
     plt.reset_output()
     self.assertTrue(isinstance(plt._default_document, plt.Document))
     self.assertEqual(plt._default_session, None)
     self.assertEqual(plt._default_file, None)
     self.assertEqual(plt._default_notebook, None)
Esempio n. 23
0
def readFirstFourAttacks():
    attacks = [
        'SHD_XY', 'Solid_XY', 'Solid_XZ', 'Solid_XZ_with_Notch',
        'Solid_XZ_with_Seam'
    ]
    # Create a dictionary to store each figure
    f = {}
    legendColors = ['navy', 'olive', 'firebrick', 'orange', 'purple', 'red']
    titles = [
        'Density Adjustment', 'Orientation Change', 'Control Specimen',
        'Notch Insertion', 'Seam Placement'
    ]
    i = int(1)
    # Load the mat file for each temperature
    mat = sio.loadmat('Tensile_Test_Data.mat')
    # Iterate through attacks
    for x in attacks:
        # Format the Bokeh plots for the temperature graphs
        f[x] = figure(width=900,
                      plot_height=600,
                      title=titles[i - 1] + ' ASTM D638 Results',
                      x_axis_label='Strain',
                      y_axis_label='Stress',
                      x_range=(0, 0.0825),
                      y_range=(0, 34))
        output_file("Attack_" + str(i) + "_" + x + ".html")
        f[x].title.text_font = 'Segoe UI'
        f[x].title.text_font_size = '26pt'
        f[x].title.align = 'center'
        f[x].xaxis.axis_label_text_font_size = '26pt'
        f[x].xaxis.major_label_text_font_size = '24pt'
        f[x].yaxis.axis_label_text_font_size = '26pt'
        f[x].yaxis.major_label_text_font_size = '24pt'
        nums = np.array([[0, 1, 2, 3, 4, 5], [None, 1, 2, 3, 4, 5],
                         [0, 1, 2, 3, 4, 5], [0, 1, 2, 4, 5, None],
                         [0, 1, 2, 3, 4, 5]])
        # Iterate through all five specimens
        for specimen in nums[i - 1, :]:
            if specimen != None:
                # Assign values for stress and strain from the MAT file
                stress = mat[x]['stress'][0][0][:, specimen]
                strain = mat[x]['strain'][0][0][:, specimen]
                f[x].line(strain,
                          stress,
                          legend=None,
                          line_width=1,
                          line_color=legendColors[specimen])
        i = i + 1
        # Write output files
        outputWrite = 'Plot_' + x + '.png'
        # export_png(p,filename=outputWrite)
        print("Finished Writing File: " + outputWrite)
        reset_output()
        # Debugging
        show(f[x])
Esempio n. 24
0
    def show_figures(self, figures=None, sizing_mode=None, toolbar_location='above', ncols=None, plot_width=None, plot_height=None, toolbar_options=None, merge_tools=True, notebook=True, doc=None, notebook_url='http://localhost:8888', **kwargs):
        """
        Method Description
            
        Parameters
        ----------
        figures: List (default: None)
            An array of Canvases to display in a grid, given as a list of lists of bokeh.plotting.figure objects. 
            If None, the instance's Canvas, along with its created widgets will be selected.
        sizing_mode: str (default: None)
            How the component should size itself. (allowed values: 'fixed', 'stretch_width', 'stretch_height', 'stretch_both', 'scale_width', 'scale_height', 'scale_both')
        toolbar_location: str (default: 'above')
            Where will the Bokeh Toolbar be located w.r.t. the Canvas (allowed values: 'above', 'below', 'left', 'right')
        ncols: int (default: None)
            Specify the number of columns you would like in your grid. You must only pass an un-nested list of plots (as opposed to a list of lists of plots) when using ncols.        
        plot_width: int (default: None)
            The width you would like all your plots to be. If None the dimensions are automatically calculated.
        plot_height: int (default: None)
            The height you would like all your plots to be. If None the dimensions are automatically calculated.
        toolbar_options: Dict (default: None)
            A dictionary of options that will be used to construct the grid’s toolbar (an instance of ToolbarBox). If none is supplied, ToolbarBox’s defaults will be used.
        merge_tools: boolean (default: True)
            Combine tools from all child plots into a single toolbar.        
        notebook: boolean (default: True)
            Output either at a Jupyter Notebook (True) or at a Browser via Python Script/Local Bokeh Server (False)
        doc: ```bokeh.io.curdoc``` instance (default: None)
            The basic foundation Bokeh uses to render the canvas (along with its widgets).
        notebook_url: str (default: 'http://localhost:8888')
            The IP address of the Jupyter Notebook. 
        **kwargs: Dict
            Other parameters related to the Canvas' output (in case the output is a Jupyter Notebook)
        """
        grid = None

        try:
            if figures is None:
                if len(self.widgets) != 0:
                    figures = [[column(*self.widgets)],[self.figure]]
                else:
                    figures = [[self.figure]]

            grid = bokeh.layouts.gridplot(figures, sizing_mode=sizing_mode, toolbar_location=toolbar_location, ncols=ncols, plot_width=plot_width, plot_height=plot_height, toolbar_options=toolbar_options, merge_tools=merge_tools)
        except TypeError as e:
            print (f'{e}. You must either: \n \t* Pass \'figures\' as a nested list of figures and leave ncols = None; or\n \t* Pass \'figures\' as a list and a non-None value to \'ncols\'.')
            
        def bokeh_app(doc):
            doc.add_root(grid)

        if notebook: 
            reset_output()
            output_notebook(**kwargs)
            show(bokeh_app, notebook_url=notebook_url)
        else:
            bokeh_app(bokeh_io.curdoc() if doc is None else doc)
Esempio n. 25
0
def histogram_days(filename, frame, name, color):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    fig = bkh.figure(x_axis_type='datetime',
                     title='Message count per day of ' + name,
                     width=720,
                     height=480)
    fig.line(frame.index, frame.frequency, color=color, line_width=3)
    fig.xaxis.axis_label = 'Date'
    fig.yaxis.axis_label = 'Frequency'
    bkh.show(fig)
    return
Esempio n. 26
0
def discrete_charts(data, cols, plot_color):
    from bokeh.charts import Bar, output_file, show
    from bokeh.io import output_notebook
    from bokeh.plotting import figure
    from bokeh.layouts import gridplot
    from bokeh.layouts import column, row
    from bokeh.plotting import reset_output
    from bokeh.charts.attributes import cat
    from collections import Counter
    from IPython.display import display
    col_dict = {}

    for col in cols:
        col_dict[col] = pd.DataFrame.from_dict(Counter(data[col]),
                                               orient='index')
        col_dict[col].columns = [col]

    row_coll = []
    rows = []
    block = True

    for column in cols:
        block = True
        column_title_split = column.split('_')
        column_title = ""
        for column_title_piece in column_title_split:
            column_title += " " + column_title_piece.title()

        p = Bar(col_dict[column],
                values=column,
                title=column_title + ' Stats',
                color=plot_color,
                plot_width=300,
                plot_height=200,
                ylabel="",
                legend=None,
                toolbar_location=None)
        rows.append(p)
        if len(rows) == 3:
            block = False
            row_coll.append(list(rows))
            reset_output()
            rows = []

    if block:
        row_coll.append(list(rows))

    #print row_coll
    gridplot = gridplot(row_coll)
    output_notebook()
    show(gridplot)
Esempio n. 27
0
def output_image_wall(imgs, output_path, title, ids, info, info_name):
    dim = find_sqrt_root_roof(len(imgs))
    img_dimension = imgs[0].shape[0]
    p = figure(title=title,
               width=dim * img_dimension,
               height=dim * img_dimension,
               tooltips=[('x,y', '@xs, @ys'), ('id', '@ids'),
                         (info_name, '@info')])
    p.x_range.range_padding = p.y_range.range_padding = 0

    rgba_images = list()
    xs = list()
    ys = list()

    for i in range(len(imgs)):
        img = imgs[i]
        rgba = np.dstack((img, 255 - np.zeros(img.shape[:-1], dtype=np.uint8)))
        rgba_images.append(rgba)
        xs.append(i % dim)
        ys.append(i // dim)

    data = dict(
        images=rgba_images,
        xs=xs,
        ys=ys,
        ids=ids,
        info=info,
    )

    p.image_rgba('images', source=data, x='xs', y='ys', dw=1, dh=1)

    additional_p = figure(title="Corresponding jpeg compress ratio",
                          width=dim * img_dimension,
                          height=dim * img_dimension,
                          tooltips=[('x,y', '@xs, @ys'), ('id', '@ids'),
                                    (info_name, '@info')])
    additional_p.x_range.range_padding = additional_p.y_range.range_padding = 0
    additional_p.rect(source=data,
                      x='xs',
                      y='ys',
                      width=1,
                      height=1,
                      color='black',
                      hover_line_color='black',
                      line_color=None,
                      alpha='info')

    output_file(output_path, title=title)
    output_column = column([p, additional_p])
    show(output_column)
    reset_output()
Esempio n. 28
0
def plot_score( trainScore, TestScore, path=None, filename='plot', title='plot' ):
	
	output_file( path + '/' + str( filename ) + '.html' )
	
	x_L = list( range( len( trainScore ) ) )
	
	p = figure(title=title, x_axis_label='time', y_axis_label='score', plot_width=1200)
	p.line( x_L, trainScore, color='#B3DE69', legend='train score' )
	p.line( x_L, TestScore, color='#CAB2D6', legend='test score' )
	p.legend.location = 'bottom_left'
	
	show(p)
	reset_output()
	
Esempio n. 29
0
def plot(df, selected):
    reset_output(state=None)

    p = Figure(title="Past 100 days Of Whatever Stock You Searched",
               x_axis_label="Date",
               x_axis_type='datetime',
               y_axis_label="Price",
               plot_width=800,
               plot_height=680)
    p.title.text_font = "arial"
    p.title.text_font_style = "bold"
    p.title.text_font_size = "12pt"
    p.title.align = "center"
    p.xaxis.axis_label_text_font = "arial"
    p.xaxis.axis_label_text_font_size = "10pt"
    p.xaxis.axis_label_text_font_style = "bold"
    p.yaxis.axis_label_text_font = "arial"
    p.yaxis.axis_label_text_font_size = "10pt"
    p.yaxis.axis_label_text_font_style = "bold"
    p.min_border_left = 0

    df["index"] = pd.to_datetime(df["index"])

    if 'open' in selected:
        p.line(df["index"][0:101],
               df["1. open"][0:101],
               color="navy",
               alpha=0.75,
               legend_label="Open")
    if 'close' in selected:
        p.line(df["index"][0:101],
               df["4. close"][0:101],
               color="green",
               alpha=0.75,
               legend_label="Close")
    if 'high' in selected:
        p.line(df["index"][0:101],
               df["2. high"][0:101],
               color="red",
               alpha=0.75,
               legend_label="High")
    if 'low' in selected:
        p.line(df["index"][0:101],
               df["3. low"][0:101],
               color="purple",
               alpha=0.75,
               legend_label="Low")

    p.legend.location = "bottom_right"
    return p
def histogram_hourofday(filename, metrics, key, title_str, ylabel):
    bkh.reset_output()
    bkh.output_file(filename, title=filename)
    hours = [
        "00:00",
        "01:00",
        "02:00",
        "03:00",
        "04:00",
        "05:00",
        "06:00",
        "07:00",
        "08:00",
        "09:00",
        "10:00",
        "11:00",
        "12:00",
        "13:00",
        "14:00",
        "15:00",
        "16:00",
        "17:00",
        "18:00",
        "19:00",
        "20:00",
        "21:00",
        "22:00",
        "23:00",
    ]
    fig = bkh.figure(x_range=hours, title=title_str, width=1280, height=480)
    fig.vbar(
        x=dodge("index", 0.35, range=fig.x_range),
        top="frequency",
        width=0.3,
        source=metrics["A"][key],
        color=colors[0],
        legend=metrics["A"]["name"],
    )
    fig.vbar(
        x=dodge("index", 0.65, range=fig.x_range),
        top="frequency",
        width=0.3,
        source=metrics["B"][key],
        color=colors[1],
        legend=metrics["B"]["name"],
    )
    fig.xaxis.axis_label = "Time"
    fig.yaxis.axis_label = ylabel
    bkh.show(fig)
    return
Esempio n. 31
0
    def plot(self):
        reset_output()
        output_file("result.html")
        TOOLTIPS = [
            ("index", "$index"),
            ("(x,y)"
             , "($x, $y)"),
        ]

        p1 = figure(
            title="Time - SWA",
            width=600,
            height=200,
            x_axis_label='Time',
            y_axis_label='SWA',
            tooltips=TOOLTIPS
        )

        p2 = figure(
            title="Time - Throttle",
            width=600,
            height=200,
            x_range=p1.x_range,
            x_axis_label='Time',
            y_axis_label='Throttle',
            tooltips=TOOLTIPS
        )

        p3 = figure(
            title="Time - Brake",
            width=600,
            height=200,
            x_range=p2.x_range,
            x_axis_label='Time',
            y_axis_label='Brake',
            tooltips=TOOLTIPS
        )

        p1.line(df.iloc[:, 0], df.iloc[:, 1], legend="SWA")
        p2.line(df.iloc[:, 0], df.iloc[:, 2], legend="Throttle")
        p3.line(df.iloc[:, 0], df.iloc[:, 3], legend="Brake")
        # p = gridplot([[p1,p2,p3]])
        # show(p)
        first = Panel(child=gridplot([[p1, p2],[p3,None]]), title='first')
        second = Panel(child=gridplot([[p1, p2],[p3,None]]), title='second')
        tabs = Tabs(tabs=[first, second])
#       layout = gridplot([[p1,p2],[p3,None]])
        #layout = row(column(p1, p2), p3)
        show(tabs)
Esempio n. 32
0
        def wrapper(*args, **kwargs):
            reset_output()
            docname = prefix + str(uuid.uuid4())
            session = Session(name=url, root_url=url)
            session.use_doc(docname)
            session.load_document(curdoc())
            session.publish()
            curdoc().autoadd = False
            curdoc().autostore = False

            obj = func(*args, **kwargs)
            tag = embed.autoload_server(obj, session, public=True)
            obj._tag = tag

            curdoc().add(obj)
            changed = session.store_document(curdoc())

            logger.debug("stored: %s", str(changed))

            return obj
Esempio n. 33
0
    def test_replot_remove(self):
        ar_downsample._loadAR()
        reset_output()
        sess = Session(client=app.test_client())
        output_server('Census', session=sess)
        source = ServerDataSource(
            expr={'op': 'Field', 'args': [':leaf', 'bivariate']}
        )
        plot = figure()
        plot.square('A', 'B', source=source)
        ar_downsample.replot(plot, remove_original=False)

        self.assertTrue(plot in curdoc().context.children, "Not retained")
        ar_downsample.replot(plot, remove_original=True)
        self.assertTrue(plot not in curdoc().context.children, "Not removed")

        try:
            ar_downsample.replot(plot, remove_original=True)
        except:
            self.assertTrue(False, "Error reploting plot not in curdoc")
Esempio n. 34
0
    def test_contour_recipe(self):
        ar_downsample._loadAR()
        reset_output()
        sess = Session(client=app.test_client())
        output_server('Census', session=sess)
        source = ServerDataSource(
            expr={'op': 'Field', 'args': [':leaf', 'bivariate']}
        )
        plot = figure(plot_width=600,
                      plot_height=400,
                      title="Test Title")
        plot.square('A', 'B', source=source)

        plot2 = ar_downsample.contours(plot, title="Contour")
        source2 = self._find_source(plot2)

        self.assertEquals("Contour", plot2.title)
        self.assertEquals(type(source2), ServerDataSource)

        transform = source2.transform
        self.assertEquals(type(transform['info']), ar_downsample.Const)
        self.assertEquals(type(transform['agg']), ar_downsample.Count)
        self.assertEquals(type(transform['shader']), ar_downsample.Seq)
        self.assertEquals(transform['shader'].out, "multi_line")
Esempio n. 35
0
    def test_heatmap_recipe(self):
        ar_downsample._loadAR()
        reset_output()
        sess = Session(client=app.test_client())
        output_server('Census', session=sess)
        source = ServerDataSource(
            expr={'op': 'Field', 'args': [':leaf', 'bivariate']}
        )
        plot = figure(plot_width=600,
                      plot_height=400,
                      title="Test Title")
        plot.square('A', 'B', source=source)

        plot2 = ar_downsample.heatmap(plot, palette="Reds9", reserve_val=0, points=True, client_color=True, title="Test Title 2")
        source2 = self._find_source(plot2)

        self.assertEquals("Test Title 2", plot2.title)
        self.assertEquals(type(source2), ServerDataSource)

        transform = source2.transform
        self.assertEquals(type(transform['info']), ar_downsample.Const)
        self.assertEquals(type(transform['agg']), ar_downsample.Count)
        self.assertEquals(type(transform['shader']), ar_downsample.Seq)
        self.assertEquals(transform['shader'].out, "image")
Esempio n. 36
0
	def TopAgencyforEachzipCode(self,mapPoints,dat):

		"""This method is to create a choropleth map for NYC in which the shape color for each zipcode represents its 
		top agency in number of complaints."""
		reset_output()
		plot = figure()
		polygons = {'lat_list':[],'lng_list':[],'color_list':[]} ##make a dict to 
		record_index = 0
		zipCodes = [];longitudes = [];latitudes = [];agencies_names = [];complaint_count = []
		colors = {'NYPD' : '#7f0000','DOT' : '#fee8c8','DEP' : '#fdd49e','DPR' : '#fdbb84','HPD' : '#fc8d59','FDNY' : '#ef6548','DOHMH' : '#d73000','TLC' : '#b30000'}
	
	
		for r in dat.iterRecords():
		
			currentZip = r[0]
		####make sure type of the data keep the same
			intzip = int(currentZip)

			if intzip in self.zipBoroughdata:
				zipCodes.append(intzip)				##get shape for this zip
				shape = dat.shapeRecord(record_index).shape
				points = shape.points
				lngs = [p[0] for p in points]
					
				lats = [p[1]for p in points]

				#store lat/lng for current zip shape
				polygons['lng_list'].append(lngs)
				polygons['lat_list'].append(lats)
				longitudes.append(lngs)
				latitudes.append(lats)
				##calculate color, according to number of complaints 
				if currentZip in mapPoints['zip_complaints']:
					sortedlist = sorted(mapPoints['zip_complaints'][currentZip].items(),key = operator.itemgetter(1),reverse = True)
					agency = sortedlist[0][0]
					complaints = sortedlist[0][1]
					##print zipcode,agency
					if agency in colors:
						agencies_names.append(agency)
						complaint_count.append(complaints)
						color = colors[agency]
					else:
						agencies_names.append('NA')
						complaint_count.append('NA')
						color = 'white'
				else:
					color = 'white'
					agencies_names.append('NA')
					complaint_count.append('NA')
				polygons['color_list'].append(color)
			record_index += 1
		file1 = output_file('TopAgencyForEachZipcode.html',title ="TopAgencyForZipCode ")
		TOOLS  = "pan,wheel_zoom,box_zoom,reset,hover,previewsave"
		source = ColumnDataSource(
			data = dict(
				longitudes = longitudes,
				latitudes = latitudes,
				agencies_names = agencies_names,
				complaint_count = complaint_count,
				zipCodes = zipCodes
				)
			)

		##create the polygons
		patches(polygons['lng_list'],polygons['lat_list'],\
			fill_color = polygons['color_list'],line_color = 'gray',\
			tools = TOOLS,plot_width = 1100, plot_height = 700,\
			title = 'Agency with top number of Complaints according to Zip Codes',source = source)
		hover = curplot().select(dict(type = HoverTool))
		hover.tooltips = OrderedDict([("Zip Code","@zipCodes"),("Top Agency Name","@agencies_names"),("Complaints","@complaint_count")])
		hold()
		x,y = -74.2,40.7
		for agency in colors:
			rect([x+0.01],[y],color = colors[agency],width = 0.01,height =.02)
			text([x],[y],text = agency,angle = 0,text_font_size = "8pt",font_weight = "bold",text_align = "right",text_baseline = "middle")
			y = y + .02
		show()
def calculate_proxy(snp,pop,request,r2_d="r2"):
	import csv,json,operator,os,sqlite3,subprocess,sys,time
	from multiprocessing.dummy import Pool
	start_time=time.time()

	# Set data directories
	data_dir="/local/content/ldlink/data/"
	gene_dir=data_dir+"refGene/sorted_refGene.txt.gz"
	recomb_dir=data_dir+"recomb/genetic_map_autosomes_combined_b37.txt.gz"
	snp_dir=data_dir+"snp142/snp142_annot_2.db"
	pop_dir=data_dir+"1000G/Phase3/samples/"
	vcf_dir=data_dir+"1000G/Phase3/genotypes/ALL.chr"
	tmp_dir="./tmp/"
	
	
	# Ensure tmp directory exists
	if not os.path.exists(tmp_dir):
		os.makedirs(tmp_dir)


	# Create JSON output
	out_json=open(tmp_dir+'proxy'+request+".json","w")
	output={}


	# Find coordinates (GRCh37/hg19) for SNP RS number
	# Connect to snp142 database
	conn=sqlite3.connect(snp_dir)
	conn.text_factory=str
	cur=conn.cursor()
	
	def get_coords(rs):
		id=rs.strip("rs")
		t=(id,)
		cur.execute("SELECT * FROM tbl_"+id[-1]+" WHERE id=?", t)
		return cur.fetchone()

	# Find RS number in snp142 database
	snp_coord=get_coords(snp)
	
	# Close snp142 connection
	cur.close()
	conn.close()
	
	if snp_coord==None:
		output["error"]=snp+" is not in dbSNP build 142."
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		return("","")
		raise
	
	
	
	# Select desired ancestral populations
	pops=pop.split("+")
	pop_dirs=[]
	for pop_i in pops:
		if pop_i in ["ALL","AFR","AMR","EAS","EUR","SAS","ACB","ASW","BEB","CDX","CEU","CHB","CHS","CLM","ESN","FIN","GBR","GIH","GWD","IBS","ITU","JPT","KHV","LWK","MSL","MXL","PEL","PJL","PUR","STU","TSI","YRI"]:
			pop_dirs.append(pop_dir+pop_i+".txt")
		else:
			output["error"]=pop_i+" is not an ancestral population. Choose one of the following ancestral populations: AFR, AMR, EAS, EUR, or SAS; or one of the following sub-populations: ACB, ASW, BEB, CDX, CEU, CHB, CHS, CLM, ESN, FIN, GBR, GIH, GWD, IBS, ITU, JPT, KHV, LWK, MSL, MXL, PEL, PJL, PUR, STU, TSI, or YRI."
			json_output=json.dumps(output, sort_keys=True, indent=2)
			print >> out_json, json_output
			out_json.close()
			return("","")
			raise

	get_pops="cat "+" ".join(pop_dirs)+" > "+tmp_dir+"pops_"+request+".txt"
	subprocess.call(get_pops, shell=True)


	# Get population ids
	pop_list=open(tmp_dir+"pops_"+request+".txt").readlines()
	ids=[]
	for i in range(len(pop_list)):
		ids.append(pop_list[i].strip())

	pop_ids=list(set(ids))


	# Extract query SNP phased genotypes
	vcf_file=vcf_dir+snp_coord[1]+".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"
	
	tabix_snp_h="tabix -H {0} | grep CHROM".format(vcf_file)
	proc_h=subprocess.Popen(tabix_snp_h, shell=True, stdout=subprocess.PIPE)
	head=proc_h.stdout.readlines()[0].strip().split()
	
	tabix_snp="tabix {0} {1}:{2}-{2} | grep -v -e END > {3}".format(vcf_file, snp_coord[1], snp_coord[2], tmp_dir+"snp_no_dups_"+request+".vcf")
	subprocess.call(tabix_snp, shell=True)


	# Check SNP is in the 1000G population, has the correct RS number, and not monoallelic 
	vcf=open(tmp_dir+"snp_no_dups_"+request+".vcf").readlines()
	
	if len(vcf)==0:
		output["error"]=snp+" is not in 1000G reference panel."
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
		subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
		return("","")
		raise
	elif len(vcf)>1:
		geno=[]
		for i in range(len(vcf)):
			if vcf[i].strip().split()[2]==snp:
				geno=vcf[i].strip().split()
		if geno==[]:
			output["error"]=snp+" is not in 1000G reference panel."
			json_output=json.dumps(output, sort_keys=True, indent=2)
			print >> out_json, json_output
			out_json.close()
			subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
			subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
			return("","")
			raise
	else:
		geno=vcf[0].strip().split()
	
	if geno[2]!=snp:
		output["warning"]="Genomic position for query variant ("+snp+") does not match RS number at 1000G position ("+geno[2]+")"
		snp=geno[2]
		
	if "," in geno[3] or "," in geno[4]:
		output["error"]=snp+" is not a biallelic variant."
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
		subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
		return("","")
		raise
	
	
	index=[]
	for i in range(9,len(head)):
		if head[i] in pop_ids:
			index.append(i)

	genotypes={"0":0, "1":0}
	for i in index:
		sub_geno=geno[i].split("|")
		for j in sub_geno:
			if j in genotypes:
				genotypes[j]+=1
			else:
				genotypes[j]=1

	if genotypes["0"]==0 or genotypes["1"]==0:
		output["error"]=snp+" is monoallelic in the "+pop+" population."
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
		subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
		return("","")
		raise
	
	
	# Define window of interest around query SNP
	window=500000
	coord1=int(snp_coord[2])-window
	if coord1<0:
		coord1=0
	coord2=int(snp_coord[2])+window
	print ""


	# Calculate proxy LD statistics in parallel
	threads=4
	block=(2*window)/4
	commands=[]
	for i in range(threads):
		if i==min(range(threads)) and i==max(range(threads)):
			command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1)+" "+str(coord2)+" "+request+" "+str(i)
		elif i==min(range(threads)):
			command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1)+" "+str(coord1+block)+" "+request+" "+str(i)
		elif i==max(range(threads)):
			command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1+(block*i)+1)+" "+str(coord2)+" "+request+" "+str(i)
		else:
			command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1+(block*i)+1)+" "+str(coord1+(block*(i+1)))+" "+request+" "+str(i)
		commands.append(command)

	processes=[subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) for command in commands]
	
	# collect output in parallel
	def get_output(process):
		return process.communicate()[0].splitlines()

	pool = Pool(len(processes))
	out_raw=pool.map(get_output, processes)
	pool.close()
	pool.join()


	# Aggregate output
	out_prox=[]
	for i in range(len(out_raw)):
		for j in range(len(out_raw[i])):
			col=out_raw[i][j].strip().split("\t")
			col[6]=int(col[6])
			col[7]=float(col[7])
			col[8]=float(col[8])
			col.append(abs(int(col[6])))
			out_prox.append(col)
	
		
	# Sort output
	if r2_d not in ["r2","d"]:
		if "warning" in output:
			output["warning"]=output["warning"]+". "+r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default"
		else:
			output["warning"]=r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default"
		r2_d="r2"
	
	
	out_dist_sort=sorted(out_prox, key=operator.itemgetter(14))
	if r2_d=="r2":
		out_ld_sort=sorted(out_dist_sort, key=operator.itemgetter(8), reverse=True)
	else:
		out_ld_sort=sorted(out_dist_sort, key=operator.itemgetter(7), reverse=True)


	# Populate JSON and text output
	outfile=open(tmp_dir+"proxy"+request+".txt","w")
	header=["RS_Number","Coord","Alleles","MAF","Distance","Dprime","R2","Correlated_Alleles","RegulomeDB","Function"]
	print >> outfile, "\t".join(header)
	
	track=open(tmp_dir+"track"+request+".txt","w")
	print >> track, "browser position chr"+str(snp_coord[1])+":"+str(coord1)+"-"+str(coord2)
	print >> track, ""
	print >> track, "track name=\""+snp+"\" description=\"Query Variant: "+snp+"\" color=108,108,255"
	
	query_snp={}
	query_snp["RS"]=out_ld_sort[0][3]
	query_snp["Alleles"]=out_ld_sort[0][1]
	query_snp["Coord"]=out_ld_sort[0][2]
	query_snp["Dist"]=out_ld_sort[0][6]
	query_snp["Dprime"]=str(round(float(out_ld_sort[0][7]),4))
	query_snp["R2"]=str(round(float(out_ld_sort[0][8]),4))
	query_snp["Corr_Alleles"]=out_ld_sort[0][9]
	query_snp["RegulomeDB"]=out_ld_sort[0][10]
	query_snp["MAF"]=str(round(float(out_ld_sort[0][11]),4))
	query_snp["Function"]=out_ld_sort[0][13]

	output["query_snp"]=query_snp
	
	temp=[query_snp["RS"],query_snp["Coord"],query_snp["Alleles"],query_snp["MAF"],str(query_snp["Dist"]),str(query_snp["Dprime"]),str(query_snp["R2"]),query_snp["Corr_Alleles"],query_snp["RegulomeDB"],query_snp["Function"]]
	print >> outfile, "\t".join(temp)
	
	chr,pos=query_snp["Coord"].split(':')
	temp2=[chr,pos,pos,query_snp["RS"]]
	print >> track, "\t".join(temp2)
	print >> track, ""
	if r2_d=="r2":
		print >> track, "track name=\"0.8<R2<1.0\" description=\"Proxy Variants with 0.8<R2<1.0\" color=198,129,0"
	else:
		print >> track, "track name=\"0.8<D'<1.0\" description=\"Proxy Variants with 0.8<D'<1.0\" color=198,129,0"
	
	
	
	proxies={}
	rows=[]
	digits=len(str(len(out_ld_sort)))
	r2_d_prior=1
	counter=0
	cutoff=[0.8,0.6,0.4,0.2,0.0]
	
	for i in range(1,len(out_ld_sort)):
		if float(out_ld_sort[i][8])>0.01 and out_ld_sort[i][3]!=snp:
			proxy_info={}
			row=[]
			proxy_info["RS"]=out_ld_sort[i][3]
			proxy_info["Alleles"]=out_ld_sort[i][4]
			proxy_info["Coord"]=out_ld_sort[i][5]
			proxy_info["Dist"]=out_ld_sort[i][6]
			proxy_info["Dprime"]=str(round(float(out_ld_sort[i][7]),4))
			proxy_info["R2"]=str(round(float(out_ld_sort[i][8]),4))
			proxy_info["Corr_Alleles"]=out_ld_sort[i][9]
			proxy_info["RegulomeDB"]=out_ld_sort[i][10]
			proxy_info["MAF"]=str(round(float(out_ld_sort[i][12]),4))
			proxy_info["Function"]=out_ld_sort[i][13]
			proxies["proxy_"+(digits-len(str(i)))*"0"+str(i)]=proxy_info
			chr,pos=proxy_info["Coord"].split(':')
			
			# Adding a row for the Data Table
			row.append(proxy_info["RS"])
			row.append(chr)
			row.append(pos)
			row.append(proxy_info["Alleles"])
			row.append(str(round(float(proxy_info["MAF"]),4)))
			row.append(proxy_info["Dist"])
			row.append(str(round(float(proxy_info["Dprime"]),4)))
			row.append(str(round(float(proxy_info["R2"]),4)))
			row.append(proxy_info["Corr_Alleles"])
			row.append(proxy_info["RegulomeDB"])
			row.append("HaploReg link")
			row.append(proxy_info["Function"])
			rows.append(row)
			
			temp=[proxy_info["RS"],proxy_info["Coord"],proxy_info["Alleles"],proxy_info["MAF"],str(proxy_info["Dist"]),str(proxy_info["Dprime"]),str(proxy_info["R2"]),proxy_info["Corr_Alleles"],proxy_info["RegulomeDB"],proxy_info["Function"]]
			print >> outfile, "\t".join(temp)
			
			temp2=[chr,pos,pos,proxy_info["RS"]]
			print >> track, "\t".join(temp2)
			
			if r2_d=="r2" and cutoff[counter]<r2_d_prior and float(proxy_info["R2"])<=cutoff[counter]:
				print >> track, ""
				print >> track, "track name=\""+str(cutoff[counter+1])+"<R2<"+str(cutoff[counter])+"\" description=\"Proxy Variants with "+str(cutoff[counter+1])+"<R2<"+str(cutoff[counter])+"\" color=198,129,0"
				counter+=1
			elif r2_d=="d" and cutoff[counter]<r2_d_prior and float(proxy_info["Dprime"])<=cutoff[counter]:
				print >> track, ""
				print >> track, "track name=\""+str(cutoff[counter+1])+"<D'<"+str(cutoff[counter])+"\" description=\"Proxy Variants with "+str(cutoff[counter+1])+"<D'<"+str(cutoff[counter])+"\" color=198,129,0"
				counter+=1
			
			if r2_d=="r2":
				r2_d_prior=proxy_info["R2"]
			else:
				r2_d_prior=proxy_info["Dprime"]

	output["aaData"]=rows
	output["proxy_snps"]=proxies
	
	
	# Output JSON and text file
	json_output=json.dumps(output, sort_keys=True, indent=2)
	print >> out_json, json_output
	out_json.close()
	
	outfile.close()
	track.close()
	
	
	# Organize scatter plot data
	q_rs=[]
	q_allele=[]
	q_coord=[]
	q_maf=[]
	p_rs=[]
	p_allele=[]
	p_coord=[]
	p_maf=[]
	dist=[]
	d_prime=[]
	d_prime_round=[]
	r2=[]
	r2_round=[]
	corr_alleles=[]
	regdb=[]
	funct=[]
	color=[]
	size=[]
	for i in range(len(out_ld_sort)):
		q_rs_i,q_allele_i,q_coord_i,p_rs_i,p_allele_i,p_coord_i,dist_i,d_prime_i,r2_i,corr_alleles_i,regdb_i,q_maf_i,p_maf_i,funct_i,dist_abs=out_ld_sort[i]
		
		if float(r2_i)>0.01:
			q_rs.append(q_rs_i)
			q_allele.append(q_allele_i)
			q_coord.append(float(q_coord_i.split(":")[1])/1000000)
			q_maf.append(str(round(float(q_maf_i),4)))
			if p_rs_i==".":
				p_rs_i=p_coord_i
			p_rs.append(p_rs_i)
			p_allele.append(p_allele_i)
			p_coord.append(float(p_coord_i.split(":")[1])/1000000)
			p_maf.append(str(round(float(p_maf_i),4)))
			dist.append(str(round(dist_i/1000000.0,4)))
			d_prime.append(float(d_prime_i))
			d_prime_round.append(str(round(float(d_prime_i),4)))
			r2.append(float(r2_i))
			r2_round.append(str(round(float(r2_i),4)))
			corr_alleles.append(corr_alleles_i)
			
			# Correct Missing Annotations
			if regdb_i==".":
				regdb_i=""
			regdb.append(regdb_i)
			if funct_i==".":
				funct_i=""
			if funct_i=="NA":
				funct_i="none"
			funct.append(funct_i)
			
			# Set Color
			if i==0:
				color_i="blue"
			elif funct_i!="none" and funct_i!="":
				color_i="red"
			else:
				color_i="orange"
			color.append(color_i)
			
			# Set Size
			size_i=9+float(p_maf_i)*14.0
			size.append(size_i)
	
	
	# Begin Bokeh Plotting
	from collections import OrderedDict
	from bokeh.embed import components,file_html
	from bokeh.models import HoverTool,LinearAxis,Range1d
	from bokeh.plotting import ColumnDataSource,curdoc,figure,output_file,reset_output,save
	from bokeh.resources import CDN
	
	reset_output()
	
	source=ColumnDataSource(
		data=dict(
			qrs=q_rs,
			q_alle=q_allele,
			q_maf=q_maf,
			prs=p_rs,
			p_alle=p_allele,
			p_maf=p_maf,
			dist=dist,
			r=r2_round,
			d=d_prime_round,
			alleles=corr_alleles,
			regdb=regdb,
			funct=funct,
		)
	)
	
	
	# Proxy Plot
	x=p_coord
	if r2_d=="r2":
		y=r2
	else:
		y=d_prime
	whitespace=0.01
	xr=Range1d(start=coord1/1000000.0-whitespace, end=coord2/1000000.0+whitespace)
	yr=Range1d(start=-0.03, end=1.03)
	sup_2=u"\u00B2"

	proxy_plot=figure(
				title="Proxies for "+snp+" in "+pop,
				min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False,
				plot_width=900,
				plot_height=600,
				x_range=xr, y_range=yr,
				tools="hover,tap,pan,box_zoom,box_select,reset,previewsave", logo=None,
				toolbar_location="above")
	
	tabix_recomb="tabix -fh {0} {1}:{2}-{3} > {4}".format(recomb_dir, snp_coord[1], coord1-whitespace, coord2+whitespace, tmp_dir+"recomb_"+request+".txt")
	subprocess.call(tabix_recomb, shell=True)
	filename=tmp_dir+"recomb_"+request+".txt"
	recomb_raw=open(filename).readlines()
	recomb_x=[]
	recomb_y=[]
	for i in range(len(recomb_raw)):
		chr,pos,rate=recomb_raw[i].strip().split()
		recomb_x.append(int(pos)/1000000.0)
		recomb_y.append(float(rate)/100.0)
	
	proxy_plot.line(recomb_x, recomb_y, size=12, color="black", alpha=0.5)
	
	proxy_plot.circle(x, y, size=size, source=source, color=color, alpha=0.5)
	
	hover=proxy_plot.select(dict(type=HoverTool))
	hover.tooltips=OrderedDict([
		("Query SNP", "@qrs @q_alle"),
		("Proxy SNP", "@prs @p_alle"),
		("Distance (Mb)", "@dist"),
		("MAF (Query,Proxy)", "@q_maf,@p_maf"),
		("R"+sup_2, "@r"),
		("D\'", "@d"),
		("Correlated Alleles", "@alleles"),
		("RegulomeDB", "@regdb"),
		("Functional Class", "@funct"),
	])
	
	proxy_plot.text(x, y, text=regdb, alpha=1, text_font_size="7pt",
					text_baseline="middle", text_align="center", angle=0)
	
	if r2_d=="r2":
		proxy_plot.yaxis.axis_label="R"+sup_2
	else:
		proxy_plot.yaxis.axis_label="D\'"
	
	proxy_plot.extra_y_ranges = {"y2_axis": Range1d(start=-3, end=103)}
	proxy_plot.add_layout(LinearAxis(y_range_name="y2_axis", axis_label="Combined Recombination Rate (cM/Mb)"), "right")
	
	
	# Rug Plot
	y2_ll=[-0.03]*len(x)
	y2_ul=[1.03]*len(x)
	yr_rug=Range1d(start=-0.03, end=1.03)
	
	rug=figure(
			x_range=xr, y_range=yr_rug, border_fill='white', y_axis_type=None,
			title="", min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False,
			plot_width=900, plot_height=50, tools="xpan,tap")

	rug.segment(x, y2_ll, x, y2_ul, source=source, color=color, alpha=0.5, line_width=1)
	rug.toolbar_location=None
	
	
	# Gene Plot
	tabix_gene="tabix -fh {0} {1}:{2}-{3} > {4}".format(gene_dir, snp_coord[1], coord1, coord2, tmp_dir+"genes_"+request+".txt")
	subprocess.call(tabix_gene, shell=True)
	filename=tmp_dir+"genes_"+request+".txt"
	genes_raw=open(filename).readlines()
	
	genes_plot_start=[]
	genes_plot_end=[]
	genes_plot_y=[]
	genes_plot_name=[]
	exons_plot_x=[]
	exons_plot_y=[]
	exons_plot_w=[]
	exons_plot_h=[]
	exons_plot_name=[]
	exons_plot_id=[]
	exons_plot_exon=[]
	lines=[0]
	gap=80000
	tall=0.75
	if genes_raw!=None:
		for i in range(len(genes_raw)):
			bin,name_id,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,score,name2,cdsStartStat,cdsEndStat,exonFrames=genes_raw[i].strip().split()
			name=name2
			id=name_id
			e_start=exonStarts.split(",")
			e_end=exonEnds.split(",")
			
			# Determine Y Coordinate
			i=0
			y_coord=None
			while y_coord==None:
				if i>len(lines)-1:
					y_coord=i+1
					lines.append(int(txEnd))
				elif int(txStart)>(gap+lines[i]):
					y_coord=i+1
					lines[i]=int(txEnd)
				else:
					i+=1
			
			genes_plot_start.append(int(txStart)/1000000.0)
			genes_plot_end.append(int(txEnd)/1000000.0)
			genes_plot_y.append(y_coord)
			genes_plot_name.append(name+"  ")
			
			for i in range(len(e_start)-1):
				if strand=="+":
					exon=i+1
				else:
					exon=len(e_start)-1-i
				
				width=(int(e_end[i])-int(e_start[i]))/1000000.0
				x_coord=int(e_start[i])/1000000.0+(width/2)
				
				exons_plot_x.append(x_coord)
				exons_plot_y.append(y_coord)
				exons_plot_w.append(width)
				exons_plot_h.append(tall)
				exons_plot_name.append(name)
				exons_plot_id.append(id)
				exons_plot_exon.append(exon)


	n_rows=len(lines)
	genes_plot_yn=[n_rows-x+0.5 for x in genes_plot_y]
	exons_plot_yn=[n_rows-x+0.5 for x in exons_plot_y]
	yr2=Range1d(start=0, end=n_rows)
	
	source2=ColumnDataSource(
		data=dict(
			exons_plot_name=exons_plot_name,
			exons_plot_id=exons_plot_id,
			exons_plot_exon=exons_plot_exon,
		)
	)
	
	if len(lines)<3:
	    plot_h_pix=150
	else:
	    plot_h_pix=150+(len(lines)-2)*50
		
	
	gene_plot=figure(
					x_range=xr, y_range=yr2, border_fill='white', 
					title="", min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False,
					plot_width=900, plot_height=plot_h_pix, tools="hover,tap,xpan,box_zoom,reset,previewsave", logo=None)
					
	gene_plot.segment(genes_plot_start, genes_plot_yn, genes_plot_end, genes_plot_yn, color="black", alpha=1, line_width=2)
	gene_plot.rect(exons_plot_x, exons_plot_yn, exons_plot_w, exons_plot_h, source=source2, fill_color="grey", line_color="grey")
	gene_plot.xaxis.axis_label="Chromosome "+snp_coord[1]+" Coordinate (Mb)(GRCh37)"
	gene_plot.yaxis.axis_label="Genes"
	gene_plot.ygrid.grid_line_color=None
	gene_plot.yaxis.axis_line_color=None
	gene_plot.yaxis.minor_tick_line_color=None
	gene_plot.yaxis.major_tick_line_color=None
	gene_plot.yaxis.major_label_text_color=None
	
	hover=gene_plot.select(dict(type=HoverTool))
	hover.tooltips=OrderedDict([
		("Gene", "@exons_plot_name"),
		("ID", "@exons_plot_id"),
		("Exon", "@exons_plot_exon"),
	])
	
	gene_plot.text(genes_plot_start, genes_plot_yn, text=genes_plot_name, alpha=1, text_font_size="7pt",
		 text_font_style="bold", text_baseline="middle", text_align="right", angle=0)
	

	gene_plot.toolbar_location="below"
	
	
	#html=file_html(curdoc(), CDN, "Test Plot")
	#out_html=open("LDproxy.html","w")
	#print >> out_html, html
	#out_html.close()
	
	out_script,out_div=components(curdoc(), CDN)
	reset_output()
	
	
	
	
	
	
	
	# Print run time statistics
	pop_list=open(tmp_dir+"pops_"+request+".txt").readlines()
	print "\nNumber of Individuals: "+str(len(pop_list))

	print "SNPs in Region: "+str(len(out_prox))

	duration=time.time() - start_time
	print "Run time: "+str(duration)+" seconds\n"


	# Remove temporary files
	subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
	subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
	subprocess.call("rm "+tmp_dir+"genes_"+request+".txt", shell=True)
	subprocess.call("rm "+tmp_dir+"recomb_"+request+".txt", shell=True)


	# Return plot output
	return(out_script,out_div)
 def test_default_resources_minified(self):
     plt.output_file("foo.html")
     self.assertEqual(plt._default_file["resources"].minified, True)
     plt.reset_output()
Esempio n. 39
0
def generateCrossFilePlotsForBucket(i, lowerBound, upperBound, navigatorDF,
                                    retFilename):

    global bucketDir;
    global timeUnitString;

    aggregateLegendDict = {};
    figuresForAllFiles = [];
    fileName = bucketDir + "/bucket-" + str(i) + ".html";

    reset_output();

    intervalTitle = "Interval #" + str(i) + ". {:,}".format(lowerBound) + \
                    " to " + "{:,}".format(upperBound) + \
                    " " + timeUnitString + ".";

    # Generate a navigator chart, which shows where we are in the
    # trace and allows moving around the trace.
    #
    navigatorFigure = generateNavigatorFigure(navigatorDF, i, intervalTitle);
    figuresForAllFiles.append(navigatorFigure);

    # Select from the dataframe for this file the records whose 'start'
    # and 'end' timestamps fall within the lower and upper bound.
    #
    for fname in sorted(perFileDataFrame.keys()):

        fileDF = perFileDataFrame[fname];

        # Select operations whose start timestamp falls within
        # the current interval, delimited by lowerBound and upperBound.
        #
        startInBucket = fileDF.loc[(fileDF['start'] >= lowerBound)
                                   & (fileDF['start'] < upperBound)];

        # Select operations whose end timestamp falls within
        # the current interval, delimited by lowerBound and upperBound.
        #
        endInBucket = fileDF.loc[(fileDF['end'] > lowerBound)
                                   & (fileDF['end'] <= upperBound)];

        # Select operations that begin before this interval and end after
        # this interval, but continue throughout this interval. The interval
        # is delimited by lowerBound and upperBound.
        #
        spanBucket = fileDF.loc[(fileDF['start'] < lowerBound)
                                   & (fileDF['end'] > upperBound)];

        frames = [startInBucket, endInBucket, spanBucket];
        bucketDF = pd.concat(frames).drop_duplicates().reset_index(drop=True);

        if (bucketDF.size == 0):
            continue;

        # If the end of the function is outside the interval, let's pretend
        # that it is within the interval, otherwise we won't see any data about
        # it when we hover. This won't have the effect of showing wrong
        # data to the user.
        #
        mask = bucketDF.end >= upperBound;
        bucketDF.loc[mask, 'end'] = upperBound-1;

        # Same adjustment as above if the start of the operation falls outside
        # the interval's lower bound.
        #
        mask = bucketDF.start < lowerBound;
        bucketDF.loc[mask, 'start'] = lowerBound;

        largestStackDepth = bucketDF['stackdepthNext'].max();
        figureTitle = fname;

        figure, legendDict = generateBucketChartForFile(figureTitle, bucketDF,
                                                        largestStackDepth,
                                                        lowerBound, upperBound);
        aggregateLegendDict.update(legendDict);
        figuresForAllFiles.append(figure);

    # Create the legend for this file and insert it after the navigator figure
    if (len(aggregateLegendDict) > 0):
        legendFigure = createLegendFigure(aggregateLegendDict);
        figuresForAllFiles.insert(1, legendFigure);

    save(column(figuresForAllFiles), filename = fileName,
         title=intervalTitle, resources=CDN);

    retFilename.value = fileName;
Esempio n. 40
0
def main():

    global arrowLeftImg;
    global arrowRightImg;
    global bucketDir;
    global perFuncDF;
    global targetParallelism;

    configSupplied = False;
    figuresForAllFunctions = [];

    # Set up the argument parser
    #
    parser = argparse.ArgumentParser(description=
                                 'Visualize operation log');
    parser.add_argument('files', type=str, nargs='*',
                        help='log files to process');
    parser.add_argument('-c', '--config', dest='configFile', default='');
    parser.add_argument('-d', '--dumpCleanData', dest='dumpCleanData',
                        default=False, action='store_true',
                        help='Dump clean log data. Clean data will \
                        not include incomplete function call records, \
                        e.g., if there is a function begin record, but\
                        no function end record, or vice versa.');
    parser.add_argument('-j', dest='jobParallelism', type=int,
                        default='0');

    args = parser.parse_args();

    if (len(args.files) == 0):
        parser.print_help();
        sys.exit(1);

    # Determine the target job parallelism
    if (args.jobParallelism > 0):
        targetParallelism = args.jobParallelism;
    else:
        targetParallelism = multiprocessing.cpu_count() * 2;

    # Get names of standard CSS colors that we will use for the legend
    initColorList();

    # Read the configuration file, if supplied.
    if (args.configFile != ''):
        configSupplied = parseConfigFile(args.configFile);

    if (not configSupplied):
        pluralSuffix = "";

        print(color.BLUE + color.BOLD +
              "Will deem as outliers all function instances whose runtime " +
              "was higher than the " + str(PERCENTILE * 100) +
              "th percentile for that function."
              + color.END);


    # Create a directory for the files that display the data summarized
    # in each bucket of the outlier histogram. We call these "bucket files".
    #
    if not os.path.exists(bucketDir):
        os.makedirs(bucketDir);

    # Parallelize this later, so we are working on files in parallel.
    for fname in args.files:
        processFile(fname, args.dumpCleanData);

    # Normalize all intervals by subtracting the first timestamp.
    normalizeIntervalData();

    # Generate plots of time series slices across all files for each bucket
    # in the outlier histogram. Save each cross-file slice to an HTML file.
    #
    fileNameList = generateTSSlicesForBuckets();

    totalFuncs = len(perFuncDF.keys());
    i = 0;
    # Generate a histogram of outlier durations
    for func in sorted(perFuncDF.keys()):
        funcDF = perFuncDF[func];
        figure = createOutlierHistogramForFunction(func, funcDF, fileNameList);
        if (figure is not None):
            figuresForAllFunctions.append(figure);

        i += 1;
        percentComplete = float(i) / float(totalFuncs) * 100;
        print(color.BLUE + color.BOLD + " Generating outlier histograms... "),
        sys.stdout.write("%d%% complete  \r" % (percentComplete) );
        sys.stdout.flush();

    print(color.END);
    reset_output();
    output_file(filename = "WT-outliers.html", title="Outlier histograms");
    show(column(figuresForAllFunctions));
Esempio n. 41
0
def main(nc, save_dir, display=False):
    cf.create_dir(save_dir)

    with xr.open_dataset(nc, mask_and_scale=False) as ds:
        subsite = ds.subsite
        node = ds.node
        sensor = ds.sensor
        stream = ds.stream
        deployment = 'D0000{}'.format(str(np.unique(ds.deployment)[0]))
        t0 = ds.time_coverage_start
        t1 = ds.time_coverage_end
        sub_dir = os.path.join(save_dir, subsite, '{}-{}-{}'.format(subsite, node, sensor), stream, deployment)

        cf.create_dir(sub_dir)

        misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc', 'time', 'mission', 'obs',
                'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm']
        reg_ex = re.compile(r'\b(?:%s)\b' % '|'.join(misc))

        #  keep variables that are not in the regular expression
        vars = [s for s in ds.data_vars if not reg_ex.search(s)]

        x = ds['time'].data

        for v in vars:  # List of dataset variables
            # print v
            # Filter out variables that are strings, datetimes, or qc related
            if ds[v].dtype.kind == 'S' or ds[v].dtype == np.dtype('datetime64[ns]') or 'time' in v or 'qc_results' in v or 'qc_executed' in v:
                continue
            y = ds[v]
            try:
                y_units = y.units
            except AttributeError:
                y_units = None

            y_data = y.data

            if y_data.ndim > 1:
                continue

            source = ColumnDataSource(
                data=dict(
                    x=x,
                    y=y_data,
                )
            )
            gr = cf.get_global_ranges(subsite, node, sensor, v)

            output_file('{}/{}-{}-{}.html'.format(sub_dir, v, ds.time_coverage_start.replace(':', ''), ds.time_coverage_end.replace(':', '')))

            p = figure(width=1200,
                       height=800,
                       title='{}-{}-{}: {} - {} - {}, Stream: {}'.format(subsite, node, sensor, deployment, t0, t1, stream),
                       x_axis_label='Time (GMT)', y_axis_label='{} ({})'.format(v, y_units),
                       x_axis_type='datetime',
                       tools=[tools])
            p.line('x', 'y', legend=v, line_width=3, source=source)
            p.circle('x', 'y', fill_color='white', size=4, source=source)
            if gr:
                low_box = BoxAnnotation(top=gr[0], fill_alpha=0.05, fill_color='red')
                mid_box = BoxAnnotation(top=gr[1], bottom=gr[0], fill_alpha=0.1, fill_color='green')
                high_box = BoxAnnotation(bottom=gr[1], fill_alpha=0.05, fill_color='red')
                p.add_layout(low_box)
                p.add_layout(mid_box)
                p.add_layout(high_box)

            if display:
                show(p)
            else:
                save(p)
            reset_output()
Esempio n. 42
0
def create_choropleth(output_path, json_file, shade_data_file, palette_colour, output_type, step, min_range, max_range,
                      reverse, dynamic=True):

    reset_output()

    if isinstance(shade_data_file, str):
        results_data = pd.read_csv(shade_data_file)
    else:
        results_data = shade_data_file

    # calculate the maximum number of shades to show in final output if not user specified
    if dynamic:
        min_range = h.rounddown_nearest_ten(np.nanmin(list(results_data.result*100)))
        max_range = h.roundup_nearest_ten(np.nanmax(list(results_data.result*100)))
        step = set_dynamic_step(min_range, max_range)

    # check for a whole number in user defined values - return an error if not
    shade_no = int(((max_range+step)-min_range)/step)

    plot_dict = {}  # dict used to store each plots data - one for each shade to display.

    lower_limit = 0
    for upper_limit in range(min_range, max_range+step, step):
        temp_df = results_data[(results_data['result'] > lower_limit/100) & (results_data['result'] <= upper_limit/100)]
        if len(temp_df.index) > 0:
            plot_dict[str(upper_limit)] = dict(zip(temp_df.district, temp_df.result))
        lower_limit = upper_limit

    # separate geojson file to match the plots above
    geojson_dict = {}  # dict used to store each plots geo data
    delete_list = []  # districts to delete once all with a colour are assigned

    with open(json_file) as base_map:
        map_data = json.load(base_map)

    id_key = 'LAD11CD'  # 'LSOA11CD', 'LAD11CD'

    for key, value in plot_dict.items():

        geojson_list = []

        for feature in map_data['features']:

            if str(feature['properties'][id_key]) in value:

                geojson_list.append(feature)
                # but also remove the feature from the map_data[features] file
                delete_list.append(str(feature['properties'][id_key]))

        geojson_dict[key] = geojson_list

    # if any features have no defined output add them but assign them a zero value.
    map_data['features'] = [feature for feature in map_data['features']
                            if feature['properties'][id_key] not in delete_list]

    # add a corresponding plot for the shade for those 0 values
    if bool(map_data['features']):

        plot_dict['0'] = dict((feature['properties'][id_key], 0) for feature in map_data['features'])
        geojson_dict['0'] = [feature for feature in map_data['features']]

    # create the colour palette to use
    colours = select_palette(shade_no, palette_colour, reverse)

    source_dict = {}  # a dict that will store all the columndatasources
    for key, value in geojson_dict.items():

        define_features(value, plot_dict[key], key, source_dict, min_range, max_range, step, colours, dynamic)

    tools = "pan,wheel_zoom,box_zoom,reset,hover,save"

    title = output_type + " by LA"

    p = figure(width=900, height=900, title=title, tools=tools)

    for key in sorted(source_dict.keys(), key=int, reverse=True):

        p.patches('x', 'y', source=source_dict[key],
                  fill_color='color', fill_alpha=0.7,
                  line_color="white", line_width=0.15, legend=str(key))

    hover = p.select_one(HoverTool)
    hover.point_policy = "follow_mouse"
    hover.tooltips = [
        ("Name", "@name"),
        (output_type, "@rate%"),
        ("Code", "@code"),
    ]

    output_dir = os.path.join(output_path, "charts")

    if os.path.isdir(output_dir) is False:
        os.mkdir(output_dir)

    suffix = '.html'
    output_filename = os.path.join(output_type + suffix)

    output_file_path = os.path.join(output_dir, output_filename)

    output_file(output_file_path, title=title, mode='inline')
    save(p)
Esempio n. 43
0
    def generate_interactive_bokeh_plot(self, subset, name, savelocation, annots=None, plus=None, portability="web"):

        """
        Generates interactive bokeh plots along with (optional) annotation and enrichment reports.
        """

        # PLOT CONFIG (NOTE: maybe expose (init) some of the configs later? ex. axis labels, sizes, etc.)
        # ----------------------------------------------------------------------------------------------------------
        TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select,resize"

        plot = figure(tools=TOOLS, x_axis_label="Time points (h)", 
                                   y_axis_label="normalized expression counts")

        plot.plot_width = 800
        plot.plot_height = 800

        plot.title = name

        plot.title_text_font_size = "18pt"
        plot.title_text_color = "olive"   
        plot.title_text_font = "times"
        plot.title_text_font_style = "italic"

        # AXIS (hardcoded)
        plot.xaxis[0].ticker=FixedTicker(ticks=self.ticks)
        plot.xaxis.bounds = (0, self.timepoints[-1])

        plot.xaxis.major_label_orientation = pi/float(2.5)
        # ----------------------------------------------------------------------------------------------------------

        colour_list = toolbox.get_spaced_colors(len(subset))
        c = 0

        for gene in subset.keys():

            if annots != None:
                labelextra = [annots[gene][0].strip() for x_, y_ in zip(self.timepoints, subset[gene])]
            else:
                labelextra = ["" for x_, y_ in zip(self.timepoints, subset[gene])]


            cds = ColumnDataSource(
                data=dict(
                    x=self.timepoints,
                    y=subset[gene],
                    label=[gene for x_, y_ in zip(self.timepoints, subset[gene])],
                    labelextra = labelextra
                )
            )

            # PLOTARAMA
            plot.line("x","y", source=cds, color=colour_list[c])
            circle = Circle(x='x', y='y', line_color=None, fill_color=colour_list[c])
            c += 1
            circle_renderer = plot.add_glyph(cds, circle)


            # HOVER control
            # -----------------------------------------------------------------------------------
            if annots != None:

                if annots[gene][0].strip() != "": 

                    tooltips = """
                                <div style="width:350px">
                                <b> @label </b><br/>
                                <i><u>annotations:</u></i><br/> @labelextra    
                                </div>
                    """
                else:
                    tooltips = """
                            <div style="width:350px">
                            <b> @label </b><br/>
                            </div>
                    """


            else:
                tooltips = """
                            <div style="width:350px">
                            <b> @label </b><br/>
                            </div>
                """

            plot.add_tools( HoverTool(tooltips=tooltips, renderers=[circle_renderer]))

        # ====================================================== #
        #                        TEMPLATING                      #
        # ====================================================== #

        script, div = components(plot)


        # process "name" here to get "title"
        title = name
 

        # If an annotation or enrichment dictionary is not supplied w/ the
        # methods parameters feed a blank one to the html template renderer.
        if annots == None:
            annots = {}

        if plus is None:
            plus == {}

        # Create the main html page scaffold 
        scaffold = templater.Templater(script, div, title, annots)


        # ====================================================
        if portability == "batch":
            path = os.path.join(savelocation, "static/")
            try: 
                os.makedirs(path)
            except OSError:
                if not os.path.isdir(path):
                    raise

            cssfile = urllib.URLopener()
            cssfile.retrieve("https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css",
                              os.path.join(path, "bokeh-0.11.1.min.css"))
            jsfile = urllib.URLopener()
            jsfile.retrieve("https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.js",
                             os.path.join(path, "bokeh-0.11.1.min.js"))



        # If a dictionary of enrichment dataframes is
        # available, pass it along...
        if plus is not None:
            html = scaffold.render_main_page(portability, plus)
        else:
            html = scaffold.render_main_page(portability)

    
        # WRITE IT OUT    
        # --------------------------------------------------------------------------
        filename = os.path.join(savelocation, name+".html")

        # better to save it with the latin-1 charset because wiggly 
        # characters tend to sneak through annotations and they can be a pain...
        with codecs.open(filename, encoding='latin-1', mode="w") as f:
            f.write(html)

        reset_output(plot)    # resets plot data and avoids file balloning when iterating
Esempio n. 44
0
def main():

    global arrowLeftImg;
    global arrowRightImg;
    global bucketDir;
    global perFuncDF;

    configSupplied = False;
    figuresForAllFunctions = [];

    # Set up the argument parser
    #
    parser = argparse.ArgumentParser(description=
                                 'Visualize operation log');
    parser.add_argument('files', type=str, nargs='*',
                        help='log files to process');
    parser.add_argument('-c', '--config', dest='configFile', default='');
    args = parser.parse_args();

    if (len(args.files) == 0):
        parser.print_help();
        sys.exit(1);

    # Get names of standard CSS colors that we will use for the legend
    initColorList();

    # Read the configuration file, if supplied.
    if (args.configFile != ''):
        configSupplied = parseConfigFile(args.configFile);

    if (not configSupplied):
        pluralSuffix = "";
        if (STDEV_MULT > 1):
            pluralSuffix = "s";
        print(color.BLUE + color.BOLD +
              "Will deem as outliers all function instances whose runtime " +
              "was " + str(STDEV_MULT) + " standard deviation" + pluralSuffix +
              " greater than the average runtime for that function."
              + color.END);


    # Create a directory for the files that display the data summarized
    # in each bucket of the outlier histogram. We call these "bucket files".
    #
    if not os.path.exists(bucketDir):
        os.makedirs(bucketDir);

    # Parallelize this later, so we are working on files in parallel.
    for fname in args.files:
        processFile(fname);

    # Normalize all intervals by subtracting the first timestamp.
    normalizeIntervalData();

    # Generate plots of time series slices across all files for each bucket
    # in the outlier histogram. Save each cross-file slice to an HTML file.
    #
    fileNameList = generateTSSlicesForBuckets();

    totalFuncs = len(perFuncDF.keys());
    i = 0;
    # Generate a histogram of outlier durations
    for func in sorted(perFuncDF.keys()):
        funcDF = perFuncDF[func];
        figure = createOutlierHistogramForFunction(func, funcDF, fileNameList);
        if (figure is not None):
            figuresForAllFunctions.append(figure);

        i += 1;
        percentComplete = float(i) / float(totalFuncs) * 100;
        print(color.BLUE + color.BOLD + " Generating outlier histograms... "),
        sys.stdout.write("%d%% complete  \r" % (percentComplete) );
        sys.stdout.flush();

    print(color.END);
    reset_output();
    output_file(filename = "WT-outliers.html", title="Outlier histograms");
    show(column(figuresForAllFunctions));
def calculate_matrix(snplst,pop,request,r2_d="r2"):
	import json,math,operator,os,sqlite3,subprocess,sys

	# Set data directories
	data_dir="/local/content/ldlink/data/"
	gene_dir=data_dir+"refGene/sorted_refGene.txt.gz"
	snp_dir=data_dir+"snp142/snp142_annot_2.db"
	pop_dir=data_dir+"1000G/Phase3/samples/"
	vcf_dir=data_dir+"1000G/Phase3/genotypes/ALL.chr"
	tmp_dir="./tmp/"


	# Ensure tmp directory exists
	if not os.path.exists(tmp_dir):
		os.makedirs(tmp_dir)


	# Create JSON output
	out_json=open(tmp_dir+"matrix"+request+".json","w")
	output={}


	# Open SNP list file
	snps_raw=open(snplst).readlines()
	if len(snps_raw)>300:
		output["error"]="Maximum variant list is 300 RS numbers. Your list contains "+str(len(snps_raw))+" entries."
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		return("","")
		raise
	
	# Remove duplicate RS numbers
	snps=[]
	for snp_raw in snps_raw:
		snp=snp_raw.strip().split()
		if snp not in snps:
			snps.append(snp)


	# Select desired ancestral populations
	pops=pop.split("+")
	pop_dirs=[]
	for pop_i in pops:
		if pop_i in ["ALL","AFR","AMR","EAS","EUR","SAS","ACB","ASW","BEB","CDX","CEU","CHB","CHS","CLM","ESN","FIN","GBR","GIH","GWD","IBS","ITU","JPT","KHV","LWK","MSL","MXL","PEL","PJL","PUR","STU","TSI","YRI"]:
			pop_dirs.append(pop_dir+pop_i+".txt")
		else:
			output["error"]=pop_i+" is not an ancestral population. Choose one of the following ancestral populations: AFR, AMR, EAS, EUR, or SAS; or one of the following sub-populations: ACB, ASW, BEB, CDX, CEU, CHB, CHS, CLM, ESN, FIN, GBR, GIH, GWD, IBS, ITU, JPT, KHV, LWK, MSL, MXL, PEL, PJL, PUR, STU, TSI, or YRI."
			json_output=json.dumps(output, sort_keys=True, indent=2)
			print >> out_json, json_output
			out_json.close()
			return("","")
			raise
	
	get_pops="cat "+ " ".join(pop_dirs)
	proc=subprocess.Popen(get_pops, shell=True, stdout=subprocess.PIPE)
	pop_list=proc.stdout.readlines()
	
	ids=[i.strip() for i in pop_list]
	pop_ids=list(set(ids))


	# Connect to snp142 database
	conn=sqlite3.connect(snp_dir)
	conn.text_factory=str
	cur=conn.cursor()
	
	def get_coords(rs):
		id=rs.strip("rs")
		t=(id,)
		cur.execute("SELECT * FROM tbl_"+id[-1]+" WHERE id=?", t)
		return cur.fetchone()


	# Find RS numbers in snp142 database
	rs_nums=[]
	snp_pos=[]
	snp_coords=[]
	warn=[]
	tabix_coords=""
	for snp_i in snps:
		if len(snp_i)>0:
			if len(snp_i[0])>2:
				if snp_i[0][0:2]=="rs" and snp_i[0][-1].isdigit():
					snp_coord=get_coords(snp_i[0])
					if snp_coord!=None:
						rs_nums.append(snp_i[0])
						snp_pos.append(snp_coord[2])
						temp=[snp_i[0],snp_coord[1],snp_coord[2]]
						snp_coords.append(temp)
					else:
						warn.append(snp_i[0])
				else:
					warn.append(snp_i[0])
			else:
				warn.append(snp_i[0])
	
	# Close snp142 connection
	cur.close()
	conn.close()
	
	# Check RS numbers were found
	if warn!=[]:
		output["warning"]="The following RS numbers were not found in dbSNP 142: "+",".join(warn)
	
	if len(rs_nums)==0:
		output["error"]="Input variant list does not contain any valid RS numbers that are in dbSNP 142."
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		return("","")
		raise		


	# Check SNPs are all on the same chromosome
	for i in range(len(snp_coords)):
		if snp_coords[0][1]!=snp_coords[i][1]:
			output["error"]="Not all input variants are on the same chromosome: "+snp_coords[i-1][0]+"=chr"+str(snp_coords[i-1][1])+":"+str(snp_coords[i-1][2])+", "+snp_coords[i][0]+"=chr"+str(snp_coords[i][1])+":"+str(snp_coords[i][2])+"."
			json_output=json.dumps(output, sort_keys=True, indent=2)
			print >> out_json, json_output
			out_json.close()
			return("","")
			raise

	# Check max distance between SNPs
	distance_bp=[]
	for i in range(len(snp_coords)):
		distance_bp.append(int(snp_coords[i][2]))
	distance_max=max(distance_bp)-min(distance_bp)
	if distance_max>1000000:
		if "warning" in output:
			output["warning"]=output["warning"]+". Switch rate errors become more common as distance between query variants increases (Query range = "+str(distance_max)+" bp)"
		else:
			output["warning"]="Switch rate errors become more common as distance between query variants increases (Query range = "+str(distance_max)+" bp)"
				
	
	
	# Sort coordinates and make tabix formatted coordinates
	snp_pos_int=[int(i) for i in snp_pos]
	snp_pos_int.sort()
	snp_coord_str=[snp_coords[0][1]+":"+str(i)+"-"+str(i) for i in snp_pos_int]
	tabix_coords=" "+" ".join(snp_coord_str)
	

	# Extract 1000 Genomes phased genotypes
	vcf_file=vcf_dir+snp_coords[0][1]+".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"
	tabix_snps="tabix -h {0}{1} | grep -v -e END".format(vcf_file, tabix_coords)
	proc=subprocess.Popen(tabix_snps, shell=True, stdout=subprocess.PIPE)
	
	# Define function to correct indel alleles
	def set_alleles(a1,a2):
		if len(a1)==1 and len(a2)==1:
			a1_n=a1
			a2_n=a2
		elif len(a1)==1 and len(a2)>1:
			a1_n="-"
			a2_n=a2[1:]
		elif len(a1)>1 and len(a2)==1:
			a1_n=a1[1:]
			a2_n="-"
		elif len(a1)>1 and len(a2)>1:
			a1_n=a1[1:]
			a2_n=a2[1:]
		return(a1_n,a2_n)
	
	
	# Import SNP VCF files
	vcf=proc.stdout.readlines()
	
	# Make sure there are genotype data in VCF file
	if vcf[-1][0:6]=="#CHROM":
		output["error"]="No query variants were found in 1000G VCF file"
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		return("","")
		raise
	
	h=0
	while vcf[h][0:2]=="##":
		h+=1

	head=vcf[h].strip().split()

	# Extract haplotypes
	index=[]
	for i in range(9,len(head)):
		if head[i] in pop_ids:
			index.append(i)

	hap1=[[]]
	for i in range(len(index)-1):
		hap1.append([])
	hap2=[[]]
	for i in range(len(index)-1):
		hap2.append([])
	
	rsnum_lst=[]
	allele_lst=[]
	pos_lst=[]
	for g in range(h+1,len(vcf)):
		geno=vcf[g].strip().split()
		if "," not in geno[3] and "," not in geno[4]:
			a1,a2=set_alleles(geno[3],geno[4])
			for i in range(len(index)):
				if geno[index[i]]=="0|0":
					hap1[i].append(a1)
					hap2[i].append(a1)
				elif geno[index[i]]=="0|1":
					hap1[i].append(a1)
					hap2[i].append(a2)
				elif geno[index[i]]=="1|0":
					hap1[i].append(a2)
					hap2[i].append(a1)
				elif geno[index[i]]=="1|1":
					hap1[i].append(a2)
					hap2[i].append(a2)
				elif geno[index[i]]=="0":
					hap1[i].append(a1)
					hap2[i].append(".")
				elif geno[index[i]]=="1":
					hap1[i].append(a2)
					hap2[i].append(".")
				else:
					hap1[i].append(".")
					hap2[i].append(".")

			if geno[1] in snp_pos:
				rs_query=rs_nums[snp_pos.index(geno[1])]
				rs_1000g=geno[2]
				if rs_query==rs_1000g:
					rsnum=rs_1000g
				else:
					rsnum=rs_1000g
					if "warning" in output:
						output["warning"]=output["warning"]+". Genomic position for query variant ("+rs_query+") does not match RS number at 1000G position ("+rs_1000g+")"
					else:
						output["warning"]="Genomic position for query variant ("+rs_query+") does not match RS number at 1000G position ("+rs_1000g+")"
					
			else:
				rsnum=geno[2]
				if "warning" in output:
					output["warning"]=output["warning"]+". Genomic position ("+geno[1]+") in VCF file does not match db142 search coordinates for query variant"
				else:
					output["warning"]="Genomic position ("+geno[1]+") in VCF file does not match db142 search coordinates for query variant"
			
			rsnum_lst.append(rsnum)

			position="chr"+geno[0]+":"+geno[1]+"-"+geno[1]
			pos_lst.append(position)
			alleles=a1+"/"+a2
			allele_lst.append(alleles)
	
	# Calculate Pairwise LD Statistics
	all_haps=hap1+hap2
	ld_matrix=[[[None for v in range(2)] for i in range(len(all_haps[0]))] for j in range(len(all_haps[0]))]

	for i in range(len(all_haps[0])):
		for j in range(i,len(all_haps[0])):
			hap={}
			for k in range(len(all_haps)):
				# Extract haplotypes
				hap_k=all_haps[k][i]+all_haps[k][j]
				if hap_k in hap:
					hap[hap_k]+=1
				else:
					hap[hap_k]=1
			
			# Remove Missing Haplotypes
			keys=hap.keys()
			for key in keys:
				if "." in key:
					hap.pop(key, None)

			# Check all haplotypes are present
			if len(hap)!=4:
				snp_i_a=allele_lst[i].split("/")
				snp_j_a=allele_lst[j].split("/")
				haps=[snp_i_a[0]+snp_j_a[0],snp_i_a[0]+snp_j_a[1],snp_i_a[1]+snp_j_a[0],snp_i_a[1]+snp_j_a[1]]
				for h in haps:
					if h not in hap:
						hap[h]=0
			
			# Perform LD calculations
			A=hap[sorted(hap)[0]]
			B=hap[sorted(hap)[1]]
			C=hap[sorted(hap)[2]]
			D=hap[sorted(hap)[3]]
			delta=float(A*D-B*C)
			Ms=float((A+C)*(B+D)*(A+B)*(C+D))
			if Ms!=0:
				# D prime
				if delta<0:
					D_prime=round(abs(delta/min((A+C)*(A+B),(B+D)*(C+D))),3)
				else:
					D_prime=round(abs(delta/min((A+C)*(C+D),(A+B)*(B+D))),3)

				# R2
				r2=round((delta**2)/Ms,3)

				# Find Correlated Alleles
				if r2>0.1:
					N=A+B+C+D
					# Expected Cell Counts
					eA=(A+B)*(A+C)/N
					eB=(B+A)*(B+D)/N
					eC=(C+A)*(C+D)/N
					eD=(D+C)*(D+B)/N

					# Calculate Deltas
					dA=(A-eA)**2
					dB=(B-eB)**2
					dC=(C-eC)**2
					dD=(D-eD)**2
					dmax=max(dA,dB,dC,dD)

					if dmax==dA or dmax==dD:
						match=sorted(hap)[0][0]+"="+sorted(hap)[0][1]+","+sorted(hap)[2][0]+"="+sorted(hap)[1][1]
					else:
						match=sorted(hap)[0][0]+"="+sorted(hap)[1][1]+","+sorted(hap)[2][0]+"="+sorted(hap)[0][1]
				else:
					match="  =  ,  =  "
			else:
				D_prime="NA"
				r2="NA"
				match="  =  ,  =  "

			snp1=rsnum_lst[i]
			snp2=rsnum_lst[j]
			pos1=pos_lst[i].split("-")[0]
			pos2=pos_lst[j].split("-")[0]
			allele1=allele_lst[i]
			allele2=allele_lst[j]
			corr=match.split(",")[0].split("=")[1]+"="+match.split(",")[0].split("=")[0]+","+match.split(",")[1].split("=")[1]+"="+match.split(",")[1].split("=")[0]
			corr_f=match

			
			ld_matrix[i][j]=[snp1,snp2,allele1,allele2,corr,pos1,pos2,D_prime,r2]
			ld_matrix[j][i]=[snp2,snp1,allele2,allele1,corr_f,pos2,pos1,D_prime,r2]


	# Generate D' and R2 output matrices
	d_out=open(tmp_dir+"d_prime_"+request+".txt", "w")
	r_out=open(tmp_dir+"r2_"+request+".txt", "w")

	print >> d_out, "RS_number"+"\t"+"\t".join(rsnum_lst)
	print >> r_out, "RS_number"+"\t"+"\t".join(rsnum_lst)

	dim=len(ld_matrix)
	for i in range(dim):
		temp_d=[rsnum_lst[i]]
		temp_r=[rsnum_lst[i]]
		for j in range(dim):
			temp_d.append(str(ld_matrix[i][j][7]))
			temp_r.append(str(ld_matrix[i][j][8]))
		print >> d_out, "\t".join(temp_d)
		print >> r_out, "\t".join(temp_r)


	# Generate Plot Variables
	out=[j for i in ld_matrix for j in i]
	xnames=[]
	ynames=[]
	xA=[]
	yA=[]
	corA=[]
	xpos=[]
	ypos=[]
	D=[]
	R=[]
	box_color=[]
	box_trans=[]
	
	if r2_d not in ["r2","d"]:
		if "warning" in output:
			output["warning"]=output["warning"]+". "+r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default"
		else:
			output["warning"]=r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default"
		r2_d="r2"

	for i in range(len(out)):
		snp1,snp2,allele1,allele2,corr,pos1,pos2,D_prime,r2=out[i]
		xnames.append(snp1)
		ynames.append(snp2)
		xA.append(allele1)
		yA.append(allele2)
		corA.append(corr)
		xpos.append(pos1)
		ypos.append(pos2)
		if r2_d=="r2" and r2!="NA":
			D.append(str(round(float(D_prime),4)))
			R.append(str(round(float(r2),4)))
			box_color.append("red")
			box_trans.append(r2)
		elif r2_d=="d" and D_prime!="NA":
			D.append(str(round(float(D_prime),4)))
			R.append(str(round(float(r2),4)))
			box_color.append("red")
			box_trans.append(abs(D_prime))
		else:
			D.append("NA")
			R.append("NA")
			box_color.append("blue")
			box_trans.append(0.1)
	
	# Import plotting modules
	from collections import OrderedDict
	from bokeh.embed import components,file_html
	from bokeh.models import HoverTool,LinearAxis,Range1d
	from bokeh.plotting import ColumnDataSource,curdoc,figure,output_file,reset_output,save
	from bokeh.resources import CDN	
	from math import pi
	
	reset_output()
		
	# Aggregate Plotting Data
	x=[]
	y=[]
	w=[]
	h=[]
	coord_snps_plot=[]
	snp_id_plot=[]
	alleles_snp_plot=[]
	for i in range(0,len(xpos),int(len(xpos)**0.5)):
		x.append(int(xpos[i].split(":")[1])/1000000.0)
		y.append(0.5)
		w.append(0.00003)
		h.append(1.06)
		coord_snps_plot.append(xpos[i])
		snp_id_plot.append(xnames[i])
		alleles_snp_plot.append(xA[i])
	
	
	# Generate error if less than two SNPs
	if len(x)<2:
		output["error"]="Less than two variants to plot."
		json_output=json.dumps(output, sort_keys=True, indent=2)
		print >> out_json, json_output
		out_json.close()
		return("","")
		raise
	
	source2=ColumnDataSource(
		data=dict(
			x=x,
			y=y,
			w=w,
			h=h,
			coord_snps_plot=coord_snps_plot,
			snp_id_plot=snp_id_plot,
			alleles_snp_plot=alleles_snp_plot,
		)
	)
	
	buffer=(x[-1]-x[0])*0.025
	xr=Range1d(start=x[0]-buffer, end=x[-1]+buffer)
	yr=Range1d(start=-0.03, end=1.03)
	y2_ll=[-0.03]*len(x)
	y2_ul=[1.03]*len(x)
	
	yr_pos=Range1d(start=(x[-1]+buffer)*-1, end=(x[0]-buffer)*-1)
	yr0=Range1d(start=0, end=1)
	yr2=Range1d(start=0, end=3.8)
	yr3=Range1d(start=0, end=1)
	
	spacing=(x[-1]-x[0]+buffer+buffer)/(len(x)*1.0)
	x2=[]
	y0=[]
	y1=[]
	y2=[]
	y3=[]
	y4=[]
	for i in range(len(x)):
		x2.append(x[0]-buffer+spacing*(i+0.5))
		y0.append(0)
		y1.append(0.20)
		y2.append(0.80)
		y3.append(1)
		y4.append(1.15)
	
	xname_pos=[]
	for i in x2:
		for j in range(len(x2)):
			xname_pos.append(i)
	
	# Matrix Plot
	source = ColumnDataSource(
		data=dict(
			xname=xnames,
			xname_pos=xname_pos,
			yname=ynames,
			xA=xA,
			yA=yA,
			xpos=xpos,
			ypos=ypos,
			R2=R,
			Dp=D,
			corA=corA,
			box_color=box_color,
			box_trans=box_trans,
		)
	)
	
	threshold=70
	if len(snps)<threshold:
		matrix_plot=figure(outline_line_color="white", min_border_top=0, min_border_bottom=2, min_border_left=100, min_border_right=5, 
	       x_range=xr, y_range=list(reversed(rsnum_lst)),
	       h_symmetry=False, v_symmetry=False, border_fill='white', x_axis_type=None, logo=None,
		   tools="hover,reset,pan,box_zoom,previewsave", title=" ", plot_width=800, plot_height=700)
	
	else:
		matrix_plot=figure(outline_line_color="white", min_border_top=0, min_border_bottom=2, min_border_left=100, min_border_right=5, 
	       x_range=xr, y_range=list(reversed(rsnum_lst)),
		   h_symmetry=False, v_symmetry=False, border_fill='white', x_axis_type=None, y_axis_type=None, logo=None,
		   tools="hover,reset,pan,box_zoom,previewsave", title=" ", plot_width=800, plot_height=700)
	
	matrix_plot.rect('xname_pos', 'yname', 0.95*spacing, 0.95, source=source,
		 color="box_color", alpha="box_trans", line_color=None)
	
	matrix_plot.grid.grid_line_color=None
	matrix_plot.axis.axis_line_color=None
	matrix_plot.axis.major_tick_line_color=None
	if len(snps)<threshold:
		matrix_plot.axis.major_label_text_font_size="8pt"
		matrix_plot.xaxis.major_label_orientation="vertical"
	
	matrix_plot.axis.major_label_text_font_style="normal"
	matrix_plot.xaxis.major_label_standoff=0
	
	
	sup_2=u"\u00B2"
	
	hover=matrix_plot.select(dict(type=HoverTool))
	hover.tooltips=OrderedDict([
		("SNP 1", " "+"@yname (@yA)"),
		("SNP 2", " "+"@xname (@xA)"),
		("D\'", " "+"@Dp"),
		("R"+sup_2, " "+"@R2"),
		("Correlated Alleles", " "+"@corA"),
	])
	
	
	
	# Connecting and Rug Plots
	# Connector Plot
	if len(snps)<threshold:
		connector=figure(outline_line_color="white", y_axis_type=None, x_axis_type=None,
			x_range=xr, y_range=yr2, border_fill='white',
			title="", min_border_left=100, min_border_right=5, min_border_top=0, min_border_bottom=0, h_symmetry=False, v_symmetry=False,
			plot_width=800, plot_height=90, tools="xpan,tap")
		connector.segment(x, y0, x, y1, color="black")
		connector.segment(x, y1, x2, y2, color="black")
		connector.segment(x2, y2, x2, y3, color="black")
		connector.text(x2,y4,text=snp_id_plot,alpha=1, angle=pi/2, text_font_size="8pt",text_baseline="middle", text_align="left")
	else:
		connector=figure(outline_line_color="white", y_axis_type=None, x_axis_type=None,
			x_range=xr, y_range=yr3, border_fill='white',
			title="", min_border_left=100, min_border_right=5, min_border_top=0, min_border_bottom=0, h_symmetry=False, v_symmetry=False,
			plot_width=800, plot_height=30, tools="xpan,tap")
		connector.segment(x, y0, x, y1, color="black")
		connector.segment(x, y1, x2, y2, color="black")
		connector.segment(x2, y2, x2, y3, color="black")

	
	
	connector.yaxis.major_label_text_color=None
	connector.yaxis.minor_tick_line_alpha=0  ## Option does not work
	connector.yaxis.axis_label=" "
	connector.grid.grid_line_color=None
	connector.axis.axis_line_color=None
	connector.axis.major_tick_line_color=None
	connector.axis.minor_tick_line_color=None

	connector.toolbar_location=None
	
	# Rug Plot
	rug=figure(x_range=xr, y_range=yr, y_axis_type=None,
        title="", min_border_top=1, min_border_bottom=0, min_border_left=100, min_border_right=5, h_symmetry=False, v_symmetry=False,
        plot_width=800, plot_height=50, tools="hover,xpan,tap")
	rug.rect(x, y, w, h, source=source2, fill_color="red", dilate=True, line_color=None, fill_alpha=0.6)
	
	hover=rug.select(dict(type=HoverTool))
	hover.tooltips=OrderedDict([
		("SNP", "@snp_id_plot (@alleles_snp_plot)"),
		("Coord", "@coord_snps_plot"),
	])

	rug.toolbar_location=None
	
	
		
	# Gene Plot
	tabix_gene="tabix -fh {0} {1}:{2}-{3} > {4}".format(gene_dir, snp_coords[1][1], int((x[0]-buffer)*1000000), int((x[-1]+buffer)*1000000), tmp_dir+"genes_"+request+".txt")
	subprocess.call(tabix_gene, shell=True)
	filename=tmp_dir+"genes_"+request+".txt"
	genes_raw=open(filename).readlines()
	
	genes_plot_start=[]
	genes_plot_end=[]
	genes_plot_y=[]
	genes_plot_name=[]
	exons_plot_x=[]
	exons_plot_y=[]
	exons_plot_w=[]
	exons_plot_h=[]
	exons_plot_name=[]
	exons_plot_id=[]
	exons_plot_exon=[]
	lines=[0]
	gap=80000
	tall=0.75
	if genes_raw!=None:
		for i in range(len(genes_raw)):
			bin,name_id,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,score,name2,cdsStartStat,cdsEndStat,exonFrames=genes_raw[i].strip().split()
			name=name2
			id=name_id
			e_start=exonStarts.split(",")
			e_end=exonEnds.split(",")
			
			# Determine Y Coordinate
			i=0
			y_coord=None
			while y_coord==None:
				if i>len(lines)-1:
					y_coord=i+1
					lines.append(int(txEnd))
				elif int(txStart)>(gap+lines[i]):
					y_coord=i+1
					lines[i]=int(txEnd)
				else:
					i+=1
			
			genes_plot_start.append(int(txStart)/1000000.0)
			genes_plot_end.append(int(txEnd)/1000000.0)
			genes_plot_y.append(y_coord)
			genes_plot_name.append(name+"  ")
			
			for i in range(len(e_start)-1):
				if strand=="+":
					exon=i+1
				else:
					exon=len(e_start)-1-i
				
				width=(int(e_end[i])-int(e_start[i]))/1000000.0
				x_coord=int(e_start[i])/1000000.0+(width/2)
				
				exons_plot_x.append(x_coord)
				exons_plot_y.append(y_coord)
				exons_plot_w.append(width)
				exons_plot_h.append(tall)
				exons_plot_name.append(name)
				exons_plot_id.append(id)
				exons_plot_exon.append(exon)


	n_rows=len(lines)
	genes_plot_yn=[n_rows-w+0.5 for w in genes_plot_y]
	exons_plot_yn=[n_rows-w+0.5 for w in exons_plot_y]
	yr2=Range1d(start=0, end=n_rows)
	
	source2=ColumnDataSource(
		data=dict(
			exons_plot_name=exons_plot_name,
			exons_plot_id=exons_plot_id,
			exons_plot_exon=exons_plot_exon,
		)
	)
	
	max_genes=40
	if len(lines)<3 or len(genes_raw)>max_genes:
	    plot_h_pix=150
	else:
	    plot_h_pix=150+(len(lines)-2)*50
	
	gene_plot=figure(min_border_top=2, min_border_bottom=0, min_border_left=100, min_border_right=5,
        x_range=xr, y_range=yr2, border_fill='white',
        title="", h_symmetry=False, v_symmetry=False, logo=None,
        plot_width=800, plot_height=plot_h_pix, tools="hover,xpan,box_zoom,wheel_zoom,tap,reset,previewsave")
	
	if len(genes_raw)<=max_genes:
		gene_plot.segment(genes_plot_start, genes_plot_yn, genes_plot_end, genes_plot_yn, color="black", alpha=1, line_width=2)
		gene_plot.rect(exons_plot_x, exons_plot_yn, exons_plot_w, exons_plot_h, source=source2, fill_color="grey", line_color="grey")
		gene_plot.text(genes_plot_start, genes_plot_yn, text=genes_plot_name, alpha=1, text_font_size="7pt", text_font_style="bold", text_baseline="middle", text_align="right", angle=0)
		hover=gene_plot.select(dict(type=HoverTool))
		hover.tooltips=OrderedDict([
		("Gene", "@exons_plot_name"),
		("ID", "@exons_plot_id"),
		("Exon", "@exons_plot_exon"),
		])
	
	else:
		x_coord_text=x[0]+(x[-1]-x[0])/2.0
		gene_plot.text(x_coord_text, n_rows/2.0, text="Too many genes to plot.", alpha=1, text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0)
	
	gene_plot.xaxis.axis_label="Chromosome "+snp_coords[1][1]+" Coordinate (Mb)(GRCh37)"
	gene_plot.yaxis.axis_label="Genes"
	gene_plot.ygrid.grid_line_color=None
	gene_plot.yaxis.axis_line_color=None
	gene_plot.yaxis.minor_tick_line_color=None
	gene_plot.yaxis.major_tick_line_color=None
	gene_plot.yaxis.major_label_text_color=None
	
	hover=gene_plot.select(dict(type=HoverTool))
	hover.tooltips=OrderedDict([
		("Gene", "@exons_plot_name"),
		("ID", "@exons_plot_id"),
		("Exon", "@exons_plot_exon"),
	])
	

	
	gene_plot.toolbar_location="below"
	
	
	
	#html=file_html(curdoc(), CDN, "Test Plot")
	#out_html=open("LDmatrix.html","w")
	#print >> out_html, html
	#out_html.close()
	
	out_script,out_div=components(curdoc(), CDN)
	reset_output()
	
	
	
	# Return output
	json_output=json.dumps(output, sort_keys=True, indent=2)
	print >> out_json, json_output
	out_json.close()
	return(out_script,out_div)
Esempio n. 46
0
	def plotzipcomplaints(self,mapPoints,dat):
		reset_output()
		"""This method is to draw a circle for each zipcode in NYC. The size of the circle is proportional to 
		the number of complaints in the zipcode"""
		
		numberOfComplaints = []
		polygons = polygons = {'lat_list':[],'lng_list':[],'radius_list':[]} 
		X = []
		Y = []
		zipCodes = []
		record_index = 0
		for r in dat.iterRecords():
			currentZip = r[0]
			intzip = int(currentZip)

		# Keeps only zip codes in NY area.
			if intzip in self.zipBoroughdata:
				zipCodes.append(intzip)
			# Gets shape for this zip.
				shape = dat.shapeRecord(record_index).shape
				points = shape.points

			# Breaks into lists for lat/lng.
				lngs = [p[0] for p in points]
				lats = [p[1] for p in points]

			# Stores lat/lng for current zip shape.
				polygons['lng_list'].append(lngs)
				polygons['lat_list'].append(lats)

				zip_box = shape.bbox
				lng_avg = (zip_box[0]+zip_box[2])/2
				lat_avg = (zip_box[1]+zip_box[3])/2

				X.append(lng_avg)
				Y.append(lat_avg)

			# Calculate ratio of number of complaints
				if currentZip in mapPoints['zip_complaints']:
					numberOfComplaints.append(mapPoints['zip_complaints'][currentZip])

			record_index += 1
		maxNumComplaints = max(numberOfComplaints)
		minNumComplaints = min(numberOfComplaints)
		sortedlist=[]

		for i in sorted(numberOfComplaints):
			sortedlist.append(i)

		for i in numberOfComplaints:
			polygons['radius_list'].append(i/(maxNumComplaints*float(100)))

	# # Creates the Plot
		File3 = output_file("plotZipComplaints.html", title="ZipComplaints")
		TOOLS="pan,wheel_zoom,box_zoom,reset,previewsave"

	# Creates the polygons.
		patches(polygons['lng_list'], polygons['lat_list'], \
				fill_color = 'white', line_color="gray", \
				tools=TOOLS, plot_width=1100, plot_height=700, \
				title="Radius of circle according to the Number of Complaints in the Zip Code")
		hold()

		scatter(X,Y, fill_color='red',color='red', radius = polygons['radius_list'], alpha = 0.6, tools=TOOLS)

		show()
Esempio n. 47
0
        if len(catalog[entry]['sources']):
            html = re.sub(r'(\<\/body\>)', r'<em>Sources of data:</em><br><table><tr><th width=30px>ID</th><th>Source</th></tr>\n\1', html)
            for source in catalog[entry]['sources']:
                html = re.sub(r'(\<\/body\>)', r'<tr><td>' + source['alias'] +
                    r'</td><td>' + (('<a href="' + source['url'] + '">') if 'url' in source else '') +
                    source['name'].encode('ascii', 'xmlcharrefreplace').decode("utf-8") +
                    (r'</a>' if 'url' in source else '') +
                    r'</td></tr>\n\1', html)
            html = re.sub(r'(\<\/body\>)', r'</table>\n\1', html)
        html = re.sub(r'(\<\/body\>)', returnlink+r'\n\1', html)
        print(outdir + eventname + ".html")
        with open(outdir + eventname + ".html", "w") as fff:
            fff.write(html)

    # Necessary to clear Bokeh state
    reset_output()

    #if spectraavail and dohtml:
    #    sys.exit()

    #if fcnt > 100:
    #    sys.exit()

    # Save this stuff because next line will delete it.
    if args.writecatalog:
        if 'photoplot' in catalog[entry]:
            tdepages.append(catalog[entry]['aliases'] + ['https://tde.space/' + catalog[entry]['photoplot']])

        if 'sources' in catalog[entry]:
            for sourcerow in catalog[entry]['sources']:
                strippedname = re.sub('<[^<]+?>', '', sourcerow['name'].encode('ascii','xmlcharrefreplace').decode("utf-8"))
Esempio n. 48
0
	def comparetowagencies(self,mapPoints,dat):
		reset_output()
		"""This method is used to create an analogous map for NYC to compare two agencies in terms of number of complaints 
		for each zip code"""
		polygons = {'lat_list':[],'lng_list':[],'color_list':[]}# creates a dict for zip
		# color = ['#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#08519c', '#08306b']
		color = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"]
		ratios = []
		trueratios = []
		ratio_colors = []
		agency_names = []
		zipCodes = []
		record_index = 0
		for r in dat.iterRecords():
			currentZip = r[0]
			intzip = int(currentZip)
			if intzip in self.zipBoroughdata:
				zipCodes.append(intzip)
				shape = dat.shapeRecord(record_index).shape
				points = shape.points
				lngs = [p[0] for p in points ]
				lats = [p[1] for p in points ]
				polygons['lng_list'].append(lngs)
				polygons['lat_list'].append(lats)
			####calculate ratio of number of complaints
				if currentZip in mapPoints['zip_complaints']:
					sortedlist = sorted(mapPoints['zip_complaints'][currentZip].items(),key = operator.itemgetter(0))
					if (sortedlist[0][1]+sortedlist[1][1]) == 0:
						ratios.append('NA')
					else:
						# trueratio = (float(sortedlist[0][1])/(sortedlist[0][1]+sortedlist[1][1]))*len(color)-1
						calculate_each_ratio = int(floor((float(sortedlist[0][1])/(sortedlist[0][1]+sortedlist[1][1]))*(len(color)-1)))
						ratios.append(calculate_each_ratio)
						# trueratios.append(trueratio)
					for i in ratios:
						if i=='NA':
							polygons['color_list'].append('white')
						else:
							ii = int(i)
							polygons['color_list'].append(color[ii])
			record_index += 1
		agency_names.append(sortedlist[1][0])
		agency_names.append(sortedlist[0][0])
		file2 = output_file("CompareTwoAgencies.html", title="ComapareTwoAgencies")
		TOOLS="pan,wheel_zoom,box_zoom,reset,previewsave,hover"
		source = ColumnDataSource(
			data=dict(
			ratios = trueratios,
			zipCodes = zipCodes
				)
			)

	# Creates the polygons.
		patches(polygons['lng_list'], polygons['lat_list'], \
				fill_color=polygons['color_list'], line_color="gray", \
				tools=TOOLS, plot_width=1100, plot_height=700, \
				title="Ratio of Number of Complaints of selected agencies according to Zip Code",
				source = source)
		hover = curplot().select(dict(type=HoverTool))
		hover.tooltips = OrderedDict([("Zip Code", "@zipCodes")])
 
		
		hold()
		x, y1 = -74.2, 40.77
		y2 = 40.765

		for i, agency in enumerate(color):
			rect([x+0.01], [y1], color=color[i], width=0.01, height=.02)
			y1 = y1 + .01

			ratio_values = ['100% ', '100% ']

		for i, agency in enumerate(agency_names):
			text([x], [y2], text=ratio_values[i] + agency, angle=0, text_font_size="8pt", font_weight = 'bold', text_align="right", text_baseline="middle")
			y2 = y2 + 0.08

		show()