Ejemplo n.º 1
0
def line_chart(df_melted, y_axis, title):
    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            fields=['Year'],
                            empty='none')
    selectors = alt.Chart(df_melted).mark_point().encode(
        x='Year:Q',
        opacity=alt.value(0),
    ).add_selection(nearest)

    line = alt.Chart(df_melted).mark_line(color=color).encode(
        x=alt.X('Year:Q',
                axis=alt.Axis(tickCount=forecast_period),
                sort=list(df_melted.index)),
        y=alt.Y(y_axis),
        tooltip=[alt.Tooltip(y_axis, format=',.0%')])

    points = line.mark_point(color=color, size=40).encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0)))

    chart = alt.layer(line, selectors,
                      points).properties(title=title,
                                         width=alt.Step(60),
                                         height=400).interactive()

    return chart
Ejemplo n.º 2
0
    def get_divergence_chart(self):
        """
        Calculate and style the chart of how actual rolls diverged from the
        expected distribution.
        """
        # Get colors for bars
        normalize = lambda s: (s - s.min()) / (s.max() - s.min())
        rdiffs = normalize(self.history_count["Difference"])
        rdiffs = [d if not np.isnan(d) else 0 for d in rdiffs]
        reds =  [cm.get_cmap('Reds_r', 51)(i) for i in range(51)]
        reds += reds[::-1]
        roll_colors = [colors.to_hex(reds[int(100 * d)]) for d in rdiffs]

        # Make Altair horizontal bar chart
        plt_df = self.history_count.round(2)
        diff_chart = alt.Chart(plt_df).mark_bar(size=30, strokeWidth=3,
                                                stroke="black").encode(
            y='Roll:O',
            x=alt.X('Difference:Q', scale=alt.Scale(padding=25)),
            color=alt.Color('Roll:O', legend=None, scale=alt.Scale(
                domain=self.history_count.Roll.to_list(), range=roll_colors)),
            tooltip=list(plt_df.columns)
        ).properties(
            title="Roll Differential from Expected Count",
            width=self.screen_width / 4, height=alt.Step(32)
        ).configure_title(
            fontSize=32, dy=-50, limit=600, font="Arial", align="center",
            anchor="middle"
        ).configure_axis(
            labelFontSize=14, titleFontSize=16
        )

        roll_count = self.history_count.set_index("Roll")[["Count"]].T

        return diff_chart, roll_count
Ejemplo n.º 3
0
 def altair_chart(df, vertical_axis, coloris, titre):
     hist = alt.Chart(df).mark_bar(size=25).encode(
         x=alt.X('Region', sort='-y'),
         y=vertical_axis,
         color=alt.value(coloris)).properties(width=alt.Step(50),
                                              height=400,
                                              title=titre).interactive()
     st.altair_chart(hist, use_container_width=True)
     return
Ejemplo n.º 4
0
def bar_chart(df_melted, y_axis, title):
    chart = alt.Chart(df_melted).mark_bar(color=color, size=40).encode(
        x=alt.X('Year:Q',
                axis=alt.Axis(tickCount=forecast_period),
                sort=list(df_melted.index)),
        y=alt.Y(y_axis),
        tooltip=[alt.Tooltip(y_axis, format=',.0f')
                 ]).properties(title=title, width=alt.Step(60),
                               height=400).interactive()
    return chart
Ejemplo n.º 5
0
def huabar(a, b):
    st.write(
        alt.Chart(data).mark_bar(size=30).encode(
            x=alt.X(a, sort=None),
            y=b,
            color=alt.Color(
                b,
                scale=alt.Scale(domain=(100, -100),
                                scheme="redyellowgreen"))).properties(
                                    width=alt.Step(40), height=600))
def bar_plot(xcol,ycol):
    alt.data_transformers.disable_max_rows()
    click = alt.selection_multi()
    chart = alt.Chart(df[(df["Predicted Subscription (current)"] == ycol)]).mark_bar().encode(
    alt.X(xcol,title = xcol), 
    y= "count(Type of Job)",
    color = alt.Color(xcol, legend=None)
    ).properties(width=alt.Step(50)).configure_axis(
    labelFontSize=20,
    titleFontSize=25)
    return chart.to_html()
Ejemplo n.º 7
0
 def make_prop_chart(dimension, width_step):
     return alt.Chart(os.path.basename(props_data_file)).mark_circle(
         size=50, color='green').transform_calculate(
             rank="format(datum.rank,'03')").transform_filter(
                 select_models).transform_filter(select_brush).encode(
                     x=alt.X(f'{dimension}:N',
                             axis=alt.Axis(labelAngle=0,
                                           title=None,
                                           orient='top',
                                           labelPadding=5)),
                     y=alt.Y('prop:N', axis=alt.Axis(title=None)),
                     tooltip=['prop:N']).properties(
                         width=alt.Step(width_step),
                         title=f'specification terms (x-{dimension})'
                     ).interactive()
Ejemplo n.º 8
0
 def make_violation_chart(dimension, width_step):
     return alt.Chart(os.path.basename(violations_data_file)).mark_circle(
         color='red', ).transform_calculate(
             rank="format(datum.rank,'03')", ).transform_filter(
                 select_models).transform_filter(select_brush).encode(
                     x=alt.X(f'{dimension}:N',
                             axis=alt.Axis(labelAngle=0,
                                           title=None,
                                           orient='top',
                                           labelPadding=5)),
                     y=alt.Y('violation:N', axis=alt.Axis(title=None)),
                     size=alt.Size('num:Q', legend=None),
                     opacity=alt.Opacity('weight:Q',
                                         scale=alt.Scale(range=[0, 1]),
                                         legend=None),
                     tooltip=[
                         'set:N', 'rank:Q', 'violation:N', 'num:Q',
                         'weight:Q', 'cost_contrib:Q'
                     ]).properties(
                         width=alt.Step(width_step),
                         title=f'soft rule violations (x-{dimension})'
                     ).interactive()
Ejemplo n.º 9
0
#기준선
rule = alt.Chart(source).mark_rule(color='blue').encode(y='mean(temp_max):Q')

#새로운 축
line = base.mark_line(color='orange').encode(y='temp_min:Q')

#tik 추가
tick = base.mark_tick(
    color='red',
    thickness=2,
    size=60 * 0.9,  # controls width of tick.
).encode(y='temp_avg:Q', )

#실제 그래프
chart = (bar + text + rule + line + tick).properties(
    width=alt.Step(60),
    height=400,
)
chart.save('Simple Bar Chart.html')
"""
퍼센트 막대 그래프
"""
source = pd.DataFrame({
    'Activity': ['Sleeping', 'Eating', 'TV', 'Work', 'Exercise'],
    'Time': [8, 2, 4, 8, 2]
})

base = alt.Chart(source).transform_joinaggregate(
    TotalTime='sum(Time)', ).transform_calculate(
        PercentOfTotal="datum.Time / datum.TotalTime").encode(
            x=alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')))
Ejemplo n.º 10
0
# -- adjust
import altair as alt
import pandas as pd

data = pd.DataFrame({'name': ['a', 'b'], 'value': [4, 10]})

alt.Chart(data).mark_bar(size=10).encode(x='name:O', y='value:Q')

alt.Chart(data).mark_bar(size=30).encode(x='name:O', y='value:Q')

alt.Chart(data).mark_bar(size=30).encode(x='name:O',
                                         y='value:Q').properties(width=200)

alt.Chart(data).mark_bar(size=30).encode(
    x='name:N', y='value:Q').properties(width=alt.Step(100))

# adjust chart size

import altair as alt
from vega_datasets import data

cars = data.cars()

alt.Chart(cars).mark_bar().encode(x='Origin',
                                  y='count()').properties(width=200,
                                                          height=150)

alt.Chart(cars).mark_bar().encode(x='Origin',
                                  y='count()',
                                  column='Cylinders:Q').properties(width=100,
Ejemplo n.º 11
0
"""
Jitter Chart
------------
In this chart, we encode the ``Cylinders`` column from the ``cars`` dataset in the ``y``-channel.  Because most cars (all but seven) in this dataset have 4, 6, or 8 cylinders, the default presentation of this data would show most of the data concentrated on three horizontal lines.  Furthermore, in that default presentation, it would be difficult to gauge the relative frequencies with which different values occur (because there would be so much overlap).  To compensate for this, we use the ``yOffset`` channel to incorporate a random offset (jittering).  This is adapted from a corresponding Vega-Lite Example:
`Dot Plot with Jittering <https://vega.github.io/vega-lite/examples/point_offset_random.html>`_.
"""
# category: scatter plots
import altair as alt
from vega_datasets import data

source = data.cars()

alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Cylinders:O',
    yOffset='randomCalc:Q'
).transform_calculate(
    randomCalc='random()'
).properties(
    height=alt.Step(50)
)
Ejemplo n.º 12
0
X = Counter(sequence)
X_list = list(X)
# print(X_list)
X_values = list(X.values())
X

# Display the data in raw text (Optional to do so)
st.subheader("2. Print text")
full_forms = {'A': 'adenine (A)', 'C': 'thymine (cytosine)', 'T': 'thymine (T)', 'G': 'adenine (guamine)'}

for ch in 'ATGC':
    st.write(f"There are {str(X[ch])} {full_forms[ch]}")

# Display the data in a dataframe which is better in readability
st.subheader("3. Display DataFrame")
df = pd.DataFrame.from_dict(X, orient='index')
# print(df[:5])
df.rename({0: 'count'}, axis = 'columns', inplace=True)
# print(df[:5])
df.reset_index(inplace=True) # the original index becomes a column now
# print(df)
df.rename(columns={'index':'Nucleotide'}, inplace=True)
# print(df)
st.write(df)

# Graph using Altair
st.subheader("4. Display Bar Chart")
p = alt.Chart(df).mark_bar().encode(x='Nucleotide', y='count')
p = p.properties(width=alt.Step(80)) # 80 otherwise bars would be thin
st.write(p)
Ejemplo n.º 13
0
for fang in shdata.values():
    yao_list.extend(fang["方"].keys())

c = collections.Counter(yao_list)
data = pd.DataFrame({"fname": c.keys(), "fcount": c.values()})
data.sort_values('fcount', ascending=False, inplace=True)

# 畫漸變顏色bar圖
st.write(
    alt.Chart(data).mark_bar(size=30).encode(
        x=alt.X('fname', sort=None),
        y='fcount',
        color=alt.Color("fcount",
                        scale=alt.Scale(domain=(100, 1),
                                        scheme="redyellowgreen"))).properties(
                                            width=alt.Step(40), height=600))

st.write(data)

st.title('伤寒论方剂使用次数排名')
fang_list = [val['名'] for val in shdata.values()]
c = collections.Counter(fang_list)
data = pd.DataFrame({"fname": c.keys(), "fcount": c.values()})
data.sort_values('fcount', ascending=False, inplace=True)

huabar('fname', 'fcount')

#st.write(alt.Chart(data).mark_bar().encode(x=alt.X('fname', sort=None), y='fcount'))

st.write(data)
Ejemplo n.º 14
0
def deposits_chart(df):
    chart = alt.Chart(df).mark_bar().encode(
        alt.Column('year(date):T', title='Year'),
        alt.Y('sum(change):Q', title='Ammount'),
        alt.X('description:O', title=None)).properties(width=alt.Step(50))
    st.altair_chart(chart)
Ejemplo n.º 15
0
X_label = list(X)
X_values = list(X.values())
X

# --------------------
# 2. Printing in a text format.
# --------------------
st.subheader("2. Print text")
st.write("There are " + str(X["A"]) + " adenine (A)")
st.write("There are " + str(X["T"]) + " Thymine (T)")
st.write("There are " + str(X["G"]) + " guanine (G)")
st.write("There are " + str(X["C"]) + " cytosine (C)")

# --------------------
# 3. Displaying in `DataFrame` format.
# --------------------
st.subheader("3. Display DataFrame")
df = pd.DataFrame.from_dict(X, orient="index")
df = df.rename({0: "count"}, axis="columns")
df.reset_index(inplace=True)
df = df.rename(columns={"index": "nucleotide"})
st.write(df)

# --------------------
# 4. Display Bar Chart using `Altair`.
# --------------------
st.subheader("4. Display Bar chart")
p = alt.Chart(df).mark_bar().encode(x="nucleotide", y="count")
p = p.properties(width=alt.Step(60))  # Defines the width of the bar.
st.write(p)
Ejemplo n.º 16
0
                           ('G', seq.count('G')), ('C', seq.count('C'))])
    return dna_count_dict


X = DNA_nucleotide_count(sequence)

X_label = list(X)
X_values = list(X.values())

st.subheader("1. Print text")
st.write("There are " + str(X['A']) + " adenine (A)")
st.write("There are " + str(X['T']) + " thymine (T)")
st.write("There are " + str(X['G']) + " guanine (G)")
st.write("There are " + str(X['C']) + " cytosine (C)")

# Display Dataframe
st.subheader("3. Display DataFrame")
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'count'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns={'index': 'nucleotide'})
st.write(df)

# Display Bar chart
st.subheader("4. Display Bar Chart")
bar_chart = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count')

bar_chart = bar_chart.properties(width=alt.Step(80))

st.write(bar_chart)
Ejemplo n.º 17
0
def create_exploratory_visualisation(trial_id,
                                     directory,
                                     vis_data_file,
                                     match_data_file,
                                     violations_data_file,
                                     props_data_file,
                                     baseline_label='baseline',
                                     verde_label='verde'):
    """
    Uses altair to generate the exploratory visualisation.
    :param trial_id:
    :param directory:
    :param vis_data_file:
    :param match_data_file:
    :param violations_data_file:
    :param props_data_file:
    :param baseline_label:
    :param verde_label:
    :return:
    """

    vl_viewer = f'{trial_id}_view_one_vl.html?vl_json='

    # common data and transforms for first layer with marks for each vis model
    base = alt.Chart(os.path.basename(vis_data_file)).transform_calculate(
        rank="format(datum.rank,'03')",
        link=f"'{vl_viewer}' + datum.vl_spec_file").properties(
            width=250, height=alt.Step(30), title='visualisation rankings')

    # add a selectable square for each vis model
    select_models = alt.selection_multi(fields=['set', 'rank'])
    select_brush = alt.selection_interval()
    squares = base.mark_square(size=150).encode(
        alt.X('set:O',
              axis=alt.Axis(labelAngle=0,
                            title=None,
                            orient='top',
                            labelPadding=5)),
        alt.Y('rank:O', axis=alt.Axis(title=None)),
        tooltip=['set:N', 'rank:N', 'cost:Q'],
        opacity=alt.Opacity('has_match:O', legend=None),
        color=alt.condition(
            select_models | select_brush, alt.value('steelblue'),
            alt.value('lightgray'))).add_selection(select_models,
                                                   select_brush).interactive()

    # add a small circle with the hyperlink to the actual vis.
    # Shame that xoffset is not an encoding channel, so we have to do in two steps...
    def make_circles(vis_set, offset):
        return base.transform_filter(datum.set == vis_set).mark_circle(
            size=25,
            xOffset=offset,
        ).encode(alt.X('set:O',
                       axis=alt.Axis(labelAngle=0,
                                     title=None,
                                     orient='top',
                                     labelPadding=5)),
                 alt.Y('rank:O'),
                 tooltip=['link:N'],
                 href='link:N',
                 color=alt.condition(select_models | select_brush,
                                     alt.value('steelblue'),
                                     alt.value('lightgray'))).interactive()

    baseline_circles = make_circles(baseline_label, -15)
    verde_circles = make_circles(verde_label, 15)

    # next layer is match lines, handle case of no matches
    if match_data_file:
        col_domain = ['not', 'with_equal_cost', 'with_different_cost']
        col_range_ = ['steelblue', 'green', 'red']
        match_lines = alt.Chart(
            os.path.basename(match_data_file)).mark_line().transform_calculate(
                rank="format(datum.rank,'03')").encode(
                    alt.X('set:O',
                          axis=alt.Axis(labelAngle=0,
                                        title=None,
                                        orient='top',
                                        labelPadding=5)),
                    alt.Y('rank:O'),
                    detail=['match:N', 'match_type:N'],
                    strokeDash=alt.StrokeDash(
                        'match_type:N',
                        scale=alt.Scale(domain=['verde_addition', 'exact'],
                                        range=[[5, 4], [1, 0]]),
                        legend=alt.Legend(orient='bottom')),
                    color=alt.condition(
                        select_models | select_brush,
                        alt.Color('crossed:N',
                                  scale=alt.Scale(domain=col_domain,
                                                  range=col_range_),
                                  legend=alt.Legend(orient='bottom')),
                        alt.value('lightgray')))
    else:
        match_lines = None

    # rules to connect models with the same cost
    cost_rules = base.mark_rule(strokeWidth=2).transform_aggregate(
        min_rank='min(rank)', max_rank='max(rank)',
        groupby=['set', 'cost'
                 ]).encode(alt.X('set:O',
                                 axis=alt.Axis(labelAngle=0,
                                               title=None,
                                               orient='top',
                                               labelPadding=5)),
                           alt.Y('min_rank:O'),
                           alt.Y2('max_rank:O'),
                           color=alt.condition(select_models | select_brush,
                                               alt.value('steelblue'),
                                               alt.value('lightgray')),
                           tooltip=['cost:Q', 'min_rank:O',
                                    'max_rank:O']).interactive()

    rank_chart = baseline_circles + verde_circles

    if match_lines:
        rank_chart = rank_chart + match_lines

    rank_chart = rank_chart + cost_rules + squares

    # chart to show violation occurrences and weights for selected vis models across sets
    def make_violation_chart(dimension, width_step):
        return alt.Chart(os.path.basename(violations_data_file)).mark_circle(
            color='red', ).transform_calculate(
                rank="format(datum.rank,'03')", ).transform_filter(
                    select_models).transform_filter(select_brush).encode(
                        x=alt.X(f'{dimension}:N',
                                axis=alt.Axis(labelAngle=0,
                                              title=None,
                                              orient='top',
                                              labelPadding=5)),
                        y=alt.Y('violation:N', axis=alt.Axis(title=None)),
                        size=alt.Size('num:Q', legend=None),
                        opacity=alt.Opacity('weight:Q',
                                            scale=alt.Scale(range=[0, 1]),
                                            legend=None),
                        tooltip=[
                            'set:N', 'rank:Q', 'violation:N', 'num:Q',
                            'weight:Q', 'cost_contrib:Q'
                        ]).properties(
                            width=alt.Step(width_step),
                            title=f'soft rule violations (x-{dimension})'
                        ).interactive()

    violation_set_chart = make_violation_chart('set', 40)
    violation_rank_chart = make_violation_chart('rank', 30)

    # chart to show prop occurrences for selected vis models across sets
    def make_prop_chart(dimension, width_step):
        return alt.Chart(os.path.basename(props_data_file)).mark_circle(
            size=50, color='green').transform_calculate(
                rank="format(datum.rank,'03')").transform_filter(
                    select_models).transform_filter(select_brush).encode(
                        x=alt.X(f'{dimension}:N',
                                axis=alt.Axis(labelAngle=0,
                                              title=None,
                                              orient='top',
                                              labelPadding=5)),
                        y=alt.Y('prop:N', axis=alt.Axis(title=None)),
                        tooltip=['prop:N']).properties(
                            width=alt.Step(width_step),
                            title=f'specification terms (x-{dimension})'
                        ).interactive()

    prop_set_chart = make_prop_chart('set', 40)
    prop_rank_chart = make_prop_chart('rank', 30)

    # glue them all together
    top_chart = rank_chart | violation_set_chart | prop_set_chart
    bottom_chart = violation_rank_chart | prop_rank_chart
    chart = top_chart & bottom_chart
    # put a timestamp
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    chart = chart.properties(title=f'{trial_id} {ts}')

    file_name = os.path.join(directory, 'vegalite',
                             f'{trial_id}_view_compare.html')
    logging.info(f'writing comparison visualisation to {file_name}')
    chart.save(file_name)
Ejemplo n.º 18
0
    return d


X = DNA_nucleotide_count(sequence)

X_label = list(X)
X_values = list(X.values())

X

## 2. Print text
st.subheader('2. Print text')
st.write('There are ' + str(X['A']) + ' adenine (A)')
st.write('There are ' + str(X['T']) + ' thymine (T)')
st.write('There are ' + str(X['G']) + ' guanine (G)')
st.write('There are ' + str(X['C']) + ' cytosine (C)')

### 3. Display DataFrame
st.subheader('3. Display DataFrame')
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'count'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns={'index': 'nucleotide'})
st.write(df)

### 4 . Display Bar Chart using Altair
st.subheader('4. Display Bar Chart')
p = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count')
p = p.properties(width=alt.Step(80)  #Controls Width of bar
                 )
st.write(p)
Ejemplo n.º 19
0
    'Nucleotide Base': ['A', 'T', 'G', 'C'],
    'Count': [
        DNA_Sequence.count('A'),
        DNA_Sequence.count('T'),
        DNA_Sequence.count('G'),
        DNA_Sequence.count('C')
    ]
}
df = pd.DataFrame(data, columns=['Nucleotide Base', 'Count'])

###Graphical Representation of the base count###
st.subheader("[4] Graphical Representation of the Nucleotide Base count")
Graph = alt.Chart(df).mark_bar().encode(x='Nucleotide Base', y='Count')

#Adjusting the size of the bars in the Graph
Graph = Graph.properties(width=alt.Step(75))

#Displaying the Graph
st.write(Graph)

st.write("""
***
""")

###GC Content of the given DNA Sequence###
C_count = DNA_Sequence.count('C')  ####Counting Cytosine###
G_count = DNA_Sequence.count('G')  ####Counting Guanine###

try:
    GC_content = (C_count + G_count) / length_DNA * 100
except:
Ejemplo n.º 20
0
    },
    {
        "a": "a3",
        "b": "b3",
        "c": "x",
        "p": "0.99"
    },
    {
        "a": "a3",
        "b": "b3",
        "c": "y",
        "p": "0.80"
    },
    {
        "a": "a3",
        "b": "b3",
        "c": "z",
        "p": "0.37"
    },
])

alt.Chart(source, width=60, height=alt.Step(8)).mark_bar().encode(
    y=alt.Y("c:N", axis=None),
    x=alt.X("p:Q", title=None, axis=alt.Axis(format="%")),
    color=alt.Color("c:N",
                    title="settings",
                    legend=alt.Legend(orient="bottom", titleOrient="left")),
    row=alt.Row("a:N", title="Factor A", header=alt.Header(labelAngle=0)),
    column=alt.Column("b:N", title="Factor B"),
)
Ejemplo n.º 21
0
source = data.barley()

alt.Chart(source, title="The Morris Mistake").mark_point().encode(
    alt.X('yield:Q',
          title="Barley Yield (bushels/acre)",
          scale=alt.Scale(zero=False),
          axis=alt.Axis(grid=False)),
    alt.Y('variety:N', title="", sort='-x', axis=alt.Axis(grid=True)),
    color=alt.Color('year:N', legend=alt.Legend(title="Year")),
    row=alt.Row(
        'site:N',
        title="",
        sort=alt.EncodingSortField(field='yield', op='sum',
                                   order='descending'),
    )).properties(height=alt.Step(20)).configure_view(stroke="transparent")

###
import altair as alt
from vega_datasets import data

# Since these data are each more than 5,000 rows we'll import from the URLs
airports = data.airports.url
flights_airport = data.flights_airport.url

states = alt.topo_feature(data.us_10m.url, feature="states")

# Create mouseover selection
select_city = alt.selection_single(on="mouseover",
                                   nearest=True,
                                   fields=["origin"],
Ejemplo n.º 22
0
def app():
    ######################
    # Page Title
    ######################
    st.write("""
    # DNA Counting-App

    counting the nucleotide composition of DNA 
    
    * **Python libraries:** streamlit, altair, PIL
    """)

    url = 'https://image.shutterstock.com/image-photo/blue-helix-human-dna-structure-600w-1669326868.jpg'
    image = Image.open(requests.get(url, stream=True).raw)

    st.image(image, use_column_width=True)

    # Input Text Box
    ######################

    st.header("Enter DNA sequence")
    sequence_input = ">DNA Query 2\nGAACACGTGGAGGCAAACAGGAAGGTGAAGAAGAACTTATCCTATCAGGACGGAAGGTCCTGTGCTCGGG\nATCTTCCAGACGTCGCGACTCTAAATTGCCCCCTCTGAGGTCAAGGAACACAAGATGGTTTTGGAAATGC\nTGAACCCGATACATTATAACATCACCAGCATCGTGCCTGAAGCCATGCCTGCTGCCACCATGCCAGTCCT"
    sequence = st.text_area("Sequence input", sequence_input, height=200)
    sequence = sequence.splitlines()
    sequence = sequence[1:]
    sequence = ''.join(sequence)

    # st.header('INPUT (DNA QUERY)')
    # sequence

    def DNA_seq_count(seq):
        d = dict([
            ("A", seq.count('A')),
            ('T', seq.count("T")),
            ('G', seq.count('G')),
            ('C', seq.count('C')),
        ])
        return d

    X = DNA_seq_count(sequence)

    # Display the results in various ways
    ######################

    c1, c2, c3, c4 = st.beta_columns([1, 1, 2, 2])

    c1.subheader('1. Print Dictionary')
    c1.write(X)

    c2.subheader('2. Print text')
    c2.write('There are  ' + str(X['A']) + ' adenine (A)')
    c2.write('There are  ' + str(X['T']) + ' thymine (T)')
    c2.write('There are  ' + str(X['G']) + ' guanine (G)')
    c2.write('There are  ' + str(X['C']) + ' cytosine (C)')

    c3.subheader('3. Dataframe')
    df = pd.DataFrame.from_dict(X, orient='index')
    df = df.rename({0: 'count'}, axis='columns')
    df.reset_index(inplace=True)
    df = df.rename({'index': 'nucleotide'}, axis='columns')
    c3.write(df)

    c4.subheader('4. Display Bar graph w. Altair')
    p = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count')

    p = p.properties(width=alt.Step(60))
    c4.write(p)
Ejemplo n.º 23
0

def DNA_nucleotide_count(seq):
    d = dict([('A', seq.count('A')), ('T', seq.count('T')),
              ('G', seq.count('G')), ('C', seq.count('C'))])
    return d


x = DNA_nucleotide_count(sequence)
x

# Print text
st.subheader('2. Print text')
st.write('There are', str(x['A']), 'adenine (A)')
st.write('There are', str(x['T']), 'thymine (T)')
st.write('There are', str(x['G']), 'guanine (G)')
st.write('There are', str(x['C']), 'cytosine (C)')

# Display DataFrame
st.subheader("3. Display DataFrame")
df = pd.DataFrame.from_dict(x, orient='index')
df = df.rename({0: 'count'}, axis=1)
df.reset_index(inplace=True)
df.rename(columns={'index': 'Nucleotide'}, inplace=True)
st.write(df)

# Display bar chart
st.subheader('4. Display chart using Altair')
p = alt.Chart(df).mark_bar().encode(x='Nucleotide', y='count')
p = p.properties(width=alt.Step(80))
st.write(p)
Ejemplo n.º 24
0
st.subheader('1 Print dictionary')


def DNA_nucleotide_count(seq):
    seq = "".join(seq.splitlines()) if "\n" in seq else seq
    return dict([(key, seq.count(key)) for key in sorted(set(seq))])


X = DNA_nucleotide_count(sequence)
X

### 2. Print text
st.subheader('2. Print text')
for dna_nucleotide, count in X.items():
    st.write(f"There're {count} {DNA_NUCLEOTIDES[dna_nucleotide]}")

### 3. Display DataFrame
st.subheader('3. Display DataFrame')
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'counts'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns={'index': 'nucleotides'})
st.write(df)

### 4. Display Bar Char using Altair
st.subheader('4. Display Bar Char')
p = alt.Chart(df).mark_bar().encode(
    x="nucleotides",
    y="counts").properties(width=alt.Step(80)  # controls width of bar.
                           )
st.write(p)
#X_label = list(X)
#X_values = list(X.values())

X

### 2. Print text
st.subheader('2. Print text')
st.write('There are  ' + str(X['A']) + ' adenine (A)')
st.write('There are  ' + str(X['T']) + ' thymine (T)')
st.write('There are  ' + str(X['G']) + ' guanine (G)')
st.write('There are  ' + str(X['C']) + ' cytosine (C)')

### 3. Display DataFrame
st.subheader('3. Display DataFrame')
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'count'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns = {'index':'nucleotide'})
st.write(df)

### 4. Display Bar Chart using Altair
st.subheader('4. Display Bar chart')
p = alt.Chart(df).mark_bar().encode(
    x='nucleotide',
    y='count'
)
p = p.properties(
    width=alt.Step(80)  # controls width of bar.
)
st.write(p)
Ejemplo n.º 26
0
df.head()
df.category.unique()

d1 = df.groupby(['category']).agg({'price': 'mean', 'item_id': 'nunique'})
d1 = pd.DataFrame(d1)  # Convert Series to DataFrame
d1.reset_index(level=0, inplace=True)  # Convert Index to Column
#d1.price=d1.price.apply(np.round)
d1.price = d1.price.astype(int)
#d1=d1.sort_values(by='item_id')
d1.columns = ['Furniture Category', 'Average Price (SR)', 'Number of Items']

##

bar = alt.Chart(d1).mark_bar(
).encode(x='Furniture Category', y='Number of Items').properties(
    width=alt.Step(40),  # controls width of bar.
    title=
    '#Items vs Price Per Category (Bars represent #Items, Ticks represent Avg. Price)'
)

text_bar = bar.mark_text(
    align='center',
    baseline='bottom',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(text='Number of Items')

tick = alt.Chart(d1).mark_tick(
    color='red',
    thickness=4,
    size=40 * 0.9,  # controls width of tick.
).encode(x='Furniture Category',
Ejemplo n.º 27
0

X = DNA_nucleotide_count(sequence)

#X_label = list(X)
#X_values = list(X.values())

X

### 2. Print text
st.subheader('2. Print text')
st.write('There are  ' + str(X['A']) + ' adenine (A)')
st.write('There are  ' + str(X['T']) + ' thymine (T)')
st.write('There are  ' + str(X['G']) + ' guanine (G)')
st.write('There are  ' + str(X['C']) + ' cytosine (C)')

### 3. Display DataFrame
st.subheader('3. Display DataFrame')
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'count'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns={'index': 'nucleotide'})
st.write(df)

### 4. Display Bar Chart using Altair
st.subheader('4. Display Bar chart')
p = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count')
p = p.properties(width=alt.Step(80)  # controls width of bar.
                 )
st.write(p)
Ejemplo n.º 28
0
st.subheader('2. Print Text')
st.write("There are " + str(X['A']) + " adenine (A)")
st.write("There are " + str(X['T']) + " thymine (T)")
st.write("There are " + str(X['C']) + " guanine (G)")
st.write("There are " + str(X['T']) + " cytosine(C)")

# Display Dataframe
st.subheader('3. Display DataFrame')
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'count'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns = {'index':'nucleotide'})
st.write(df)

# Display Bar Chart Using Altair
st.subheader('4. Display Bar Chart')
p = alt.Chart(df).mark_bar().encode(
	x='nucleotide',
	y='count'
)

p = p.properties(
	width=alt.Step(80) # controls the width of the bar
)
st.write(p)





Ejemplo n.º 29
0
X

# Print text
st.subheader('2. Print text')
st.write('There are  ' + str(X['A']) + ' adenine (A)')
st.write('There are  ' + str(X['T']) + ' thymine (T)')
st.write('There are  ' + str(X['G']) + ' guanine (G)')
st.write('There are  ' + str(X['C']) + ' cytosine (C)')

# Display DataFrame
st.subheader('3. Display DataFrame')
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'count'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns = {'index':'nucleotide'})
st.write(df)

# Display BarChart
st.subheader('4. Display BarChart')
p = alt.Chart(df).mark_bar().encode(
    x='nucleotide',
    y='count'
)

# controls width of bar
p = p.properties(
    width=alt.Step(80)
)

st.write(p)
Ejemplo n.º 30
0
def main(in_file_1, out_dir):
    # read data and combine two data set vertically
    train_df = pd.read_csv(in_file_1)
    X_train = train_df.drop(columns=['quality', 'quality_rank'])
    y_train = train_df['quality_rank']

    #-----------------------------------------------------------------------------------------------------------------------------
    # PreProcessor
    numeric_features = [
        'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
        'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
        'pH', 'sulphates', 'alcohol'
    ]
    binary_features = ['type']

    numeric_transformer = make_pipeline(SimpleImputer(), StandardScaler())
    binary_transformer = make_pipeline(
        OneHotEncoder(drop="if_binary", dtype=int))

    preprocessor = ColumnTransformer(transformers=[(
        'num', numeric_transformer,
        numeric_features), ('bin', binary_transformer, binary_features)])
    #-----------------------------------------------------------------------------------------------------------------------------
    # Model selection
    results = {}
    scoring_metric = {'f1_micro'}
    classifiers_plot = {
        "RidgeClassifier":
        RidgeClassifier(random_state=123),
        "Random Forest":
        RandomForestClassifier(bootstrap=False,
                               max_depth=20,
                               max_features='sqrt',
                               n_estimators=1800,
                               random_state=123),
        "KNN":
        KNeighborsClassifier(n_neighbors=5),
        "MLP Classifier":
        MLPClassifier(alpha=0.05,
                      hidden_layer_sizes=(50, 100, 50),
                      learning_rate='adaptive',
                      max_iter=1000,
                      random_state=123),
        "Nearest Centroid":
        NearestCentroid(),
        "QDA":
        QuadraticDiscriminantAnalysis()
    }

    for (name, model) in classifiers_plot.items():
        pipe_iter = make_pipeline(preprocessor, model)
        results[name] = mean_std_cross_val_scores(pipe_iter,
                                                  X_train,
                                                  y_train,
                                                  return_train_score=True,
                                                  scoring=scoring_metric)

    pd.DataFrame(results)
    #---------------------------------------------------------------------------------------------------------
    # Plotting result
    # All classifiers
    plots_dict = {}
    plot_results = pd.DataFrame(results).T
    plot_results = plot_results.reset_index()
    bar_all = alt.Chart(plot_results).mark_bar().encode(
        alt.X('test_f1_micro', axis=alt.Axis(title='F1 Micro score')),
        alt.Y('index', sort='-x', axis=alt.Axis(title='Classifier')),
    ).properties(width=alt.Step(40)  # controls width of bar.
                 )

    plots_dict['f1_score_all_classifiers.svg'] = bar_all

    # Stability across cv folds
    scoring_metric = {'f1_micro'}
    pipe_rf = make_pipeline(
        preprocessor,
        RandomForestClassifier(bootstrap=False,
                               max_depth=20,
                               max_features='sqrt',
                               n_estimators=1800,
                               random_state=123))
    scores_rf = cross_validate(pipe_rf,
                               X_train,
                               y_train,
                               return_train_score=True,
                               scoring=scoring_metric,
                               n_jobs=-1,
                               cv=20)

    plot_rf = pd.DataFrame(scores_rf)
    bar_rf = alt.Chart(plot_rf).mark_bar().encode(
        x=alt.X('test_f1_micro',
                axis=alt.Axis(title='F1 Micro score'),
                bin=alt.Bin(extent=[0.75, 0.9], step=0.02)),
        y=alt.Y('count()'),
    )

    plots_dict['f1_score_random_forest.svg'] = bar_rf

    pipe_mlp = make_pipeline(
        preprocessor,
        MLPClassifier(alpha=0.05,
                      hidden_layer_sizes=(50, 100, 50),
                      learning_rate='adaptive',
                      max_iter=1000,
                      random_state=123))

    scores_mlp = cross_validate(pipe_mlp,
                                X_train,
                                y_train,
                                return_train_score=True,
                                scoring=scoring_metric,
                                n_jobs=-1,
                                cv=20)

    plot_mlp = pd.DataFrame(scores_mlp)
    bar_mlp = alt.Chart(plot_mlp).mark_bar().encode(
        x=alt.X('test_f1_micro',
                axis=alt.Axis(title='F1 Micro score'),
                bin=alt.Bin(maxbins=6)),
        y=alt.Y('count()'),
    )

    plots_dict['f1_score_mlp.svg'] = bar_mlp

    save_plots(out_dir, plots_dict)

    #-----------------------------------------------------------------------------------------------------------------------------
    #Hyperparameters Tuning
    rf_pipeline = make_pipeline(preprocessor, MLPClassifier())

    param_dist = {
        'mlpclassifier__hidden_layer_sizes': [(50, 50, 50), (50, 100, 50),
                                              (100, )],
        'mlpclassifier__activation': ['tanh', 'relu'],
        'mlpclassifier__solver': ['sgd', 'adam'],
        'mlpclassifier__alpha': [0.0001, 0.05],
        'mlpclassifier__learning_rate': ['constant', 'adaptive'],
        'mlpclassifier__max_iter': [300, 500, 450, 200, 300]
    }

    random_search = RandomizedSearchCV(rf_pipeline,
                                       param_distributions=param_dist,
                                       n_jobs=-1,
                                       n_iter=50,
                                       cv=5,
                                       scoring='f1_micro')
    random_search.fit(X_train, y_train)
    best_model_pipe = random_search.best_estimator_

    try:
        joblib_file = out_dir + "best_Model.pkl"
        joblib.dump(best_model_pipe, joblib_file)
    except:
        os.makedirs(os.path.dirname(out_dir))
        joblib_file = out_dir + "best_Model.pkl"
        joblib.dump(best_model_pipe, joblib_file)