Esempio n. 1
0
pdict['ms'] = max(df['rental_started_at'])

pdict['geo_source.geojson'] = geo_source.geojson
pdict['datapoints_source.data'] = dictionary
pdict['end_datapoints_source.data'] = dictionary2
pdict['source.data'] = dict(
    x=df['mrc_start_long'],
    y=df['mrc_start_lat'],
    x1=df['mrc_end_long'],
    y1=df['mrc_end_lat'],
    cx=(df['mrc_start_long'] + df['mrc_end_long']) / 2,
    cy=df['mrc_start_lat'] + df['haversine_distance'] / 8,
)

hex_filter_no = TextInput(value="", title="hex_filter_no")
hex_filter_no.visible = False
#hex_filter_threshold = TextInput(value="", title="hex_filter_threshold")

hex_filter_select = Select(
    options=["All Hexes", "Filter by Number", "Filter by Threshold"],
    value='All Hexes',
    title='Hex Filters')


def hex_filter_callback(attr, old, new):
    if hex_filter_select.value == 'All Hexes':
        hex_filter_no.visible = False
    elif hex_filter_select.value == 'Filter by Number':
        hex_filter_no.visible = True
        hex_filter_no.title = "hex_filter by number"
    else:
Esempio n. 2
0
def _create_ui_components() -> (Figure, ColumnDataSource):  # pylint: disable=too-many-statements
    global asp_table_source, asp_filter_src, op_table_source, op_filter_src
    global stats, aspects, tabs, lexicons_dropdown
    stats = pd.DataFrame(columns=['Quantity', 'Score'])
    aspects = pd.Series([])

    def new_col_data_src():
        return ColumnDataSource({'file_contents': [], 'file_name': []})

    large_text = HTMLTemplateFormatter(template='''<div><%= value %></div>''')

    def data_column(title):
        return TableColumn(field=title,
                           title='<span class="header">' + title + '</span>',
                           formatter=large_text)

    asp_table_columns = [
        data_column('Term'),
        data_column('Alias1'),
        data_column('Alias2'),
        data_column('Alias3')
    ]
    op_table_columns = [
        data_column('Term'),
        data_column('Score'),
        data_column('Polarity')
    ]

    asp_table_source = empty_table('Term', 'Alias1', 'Alias2', 'Alias3')
    asp_filter_src = empty_table('Term', 'Alias1', 'Alias2', 'Alias3')
    asp_src = new_col_data_src()

    op_table_source = empty_table('Term', 'Score', 'Polarity', 'Polarity')
    op_filter_src = empty_table('Term', 'Score', 'Polarity', 'Polarity')
    op_src = new_col_data_src()

    asp_table = DataTable(source=asp_table_source,
                          selectable='checkbox',
                          columns=asp_table_columns,
                          editable=True,
                          width=600,
                          height=500)
    op_table = DataTable(source=op_table_source,
                         selectable='checkbox',
                         columns=op_table_columns,
                         editable=True,
                         width=600,
                         height=500)

    asp_examples_box = _create_examples_table()
    op_examples_box = _create_examples_table()
    asp_layout = layout([[asp_table, asp_examples_box]])
    op_layout = layout([[op_table, op_examples_box]])
    asp_tab = Panel(child=asp_layout, title="Aspect Lexicon")
    op_tab = Panel(child=op_layout, title="Opinion Lexicon")
    tabs = Tabs(tabs=[asp_tab, op_tab], width=700, css_classes=['mytab'])

    lexicons_menu = [("Open", "open"), ("Save", "save")]
    lexicons_dropdown = Dropdown(label="Edit Lexicons",
                                 button_type="success",
                                 menu=lexicons_menu,
                                 width=140,
                                 height=31,
                                 css_classes=['mybutton'])

    train_menu = [("Parsed Data", "parsed"), ("Raw Data", "raw")]
    train_dropdown = Dropdown(label="Extract Lexicons",
                              button_type="success",
                              menu=train_menu,
                              width=162,
                              height=31,
                              css_classes=['mybutton'])

    inference_menu = [("Parsed Data", "parsed"), ("Raw Data", "raw")]
    inference_dropdown = Dropdown(label="Classify",
                                  button_type="success",
                                  menu=inference_menu,
                                  width=140,
                                  height=31,
                                  css_classes=['mybutton'])

    text_status = TextInput(value="Select training data",
                            title="Train Run Status:",
                            css_classes=['statusText'])
    text_status.visible = False

    train_src = new_col_data_src()
    infer_src = new_col_data_src()

    with open(join(dirname(__file__), "dropdown.js")) as f:
        args = dict(clicked=lexicons_dropdown,
                    asp_filter=asp_filter_src,
                    op_filter=op_filter_src,
                    asp_src=asp_src,
                    op_src=op_src,
                    tabs=tabs,
                    text_status=text_status,
                    train_src=train_src,
                    infer_src=infer_src,
                    train_clicked=train_dropdown,
                    infer_clicked=inference_dropdown,
                    opinion_lex_generic="")
        code = f.read()

    args['train_clicked'] = train_dropdown
    train_dropdown.js_on_change('value', CustomJS(args=args, code=code))

    args['train_clicked'] = inference_dropdown
    inference_dropdown.js_on_change('value', CustomJS(args=args, code=code))

    args['clicked'] = lexicons_dropdown
    lexicons_dropdown.js_on_change('value', CustomJS(args=args, code=code))

    def update_filter_source(table_source, filter_source):
        df = table_source.to_df()
        sel_inx = sorted(table_source.selected.indices)
        df = df.iloc[sel_inx, 1:]
        new_source = ColumnDataSource(df)
        filter_source.data = new_source.data

    def update_examples_box(data, examples_box, old, new):
        examples_box.source.data = {'Examples': []}
        unselected = list(set(old) - set(new))
        selected = list(set(new) - set(old))
        if len(selected) <= 1 and len(unselected) <= 1:
            examples_box.source.data.update({
                'Examples':
                [str(data.iloc[unselected[0], i])
                 for i in range(4, 24)] if len(unselected) != 0 else
                [str(data.iloc[selected[0], i]) for i in range(4, 24)]
            })

    def asp_selected_change(_, old, new):
        global asp_filter_src, asp_table_source, aspects_data
        update_filter_source(asp_table_source, asp_filter_src)
        update_examples_box(aspects_data, asp_examples_box, old, new)

    def op_selected_change(_, old, new):
        global op_filter_src, op_table_source, opinions_data
        update_filter_source(op_table_source, op_filter_src)
        update_examples_box(opinions_data, op_examples_box, old, new)

    def read_csv(file_src, headers=False, index_cols=False, readCSV=True):
        if readCSV:
            raw_contents = file_src.data['file_contents'][0]

            if len(raw_contents.split(",")) == 1:
                b64_contents = raw_contents
            else:
                # remove the prefix that JS adds
                b64_contents = raw_contents.split(",", 1)[1]
            file_contents = base64.b64decode(b64_contents)
            return pd.read_csv(io.BytesIO(file_contents),
                               encoding="ISO-8859-1",
                               keep_default_na=False,
                               na_values={None},
                               engine='python',
                               index_col=index_cols,
                               header=0 if headers else None)
        return file_src

    def read_parsed_files(file_content, file_name):
        try:
            # remove the prefix that JS adds
            b64_contents = file_content.split(",", 1)[1]
            file_content = base64.b64decode(b64_contents)
            with open(SENTIMENT_OUT / file_name, 'w') as json_file:
                data_dict = json.loads(file_content.decode("utf-8"))
                json.dump(data_dict, json_file)
        except Exception as e:
            print(str(e))

    # pylint: disable=unused-argument
    def train_file_callback(attr, old, new):
        global train_data
        SENTIMENT_OUT.mkdir(parents=True, exist_ok=True)
        train = TrainSentiment(parse=True, rerank_model=None)
        if len(train_src.data['file_contents']) == 1:
            train_data = read_csv(train_src, index_cols=0)
            file_name = train_src.data['file_name'][0]
            raw_data_path = SENTIMENT_OUT / file_name
            train_data.to_csv(raw_data_path, header=False)
            print(f'Running_SentimentTraining on data...')
            train.run(data=raw_data_path)
        else:
            f_contents = train_src.data['file_contents']
            f_names = train_src.data['file_name']
            raw_data_path = SENTIMENT_OUT / train_src.data['file_name'][
                0].split('/')[0]
            if not os.path.exists(raw_data_path):
                os.makedirs(raw_data_path)
            for f_content, f_name in zip(f_contents, f_names):
                read_parsed_files(f_content, f_name)
            print(f'Running_SentimentTraining on data...')
            train.run(parsed_data=raw_data_path)

        text_status.value = "Lexicon extraction completed"

        with io.open(AcquireTerms.acquired_aspect_terms_path, "r") as fp:
            aspect_data_csv = fp.read()
        file_data = base64.b64encode(str.encode(aspect_data_csv))
        file_data = file_data.decode("utf-8")
        asp_src.data = {
            'file_contents': [file_data],
            'file_name': ['nameFile.csv']
        }

        out_path = RerankTerms.out_dir / 'generated_opinion_lex_reranked.csv'
        with io.open(out_path, "r") as fp:
            opinion_data_csv = fp.read()
        file_data = base64.b64encode(str.encode(opinion_data_csv))
        file_data = file_data.decode("utf-8")
        op_src.data = {
            'file_contents': [file_data],
            'file_name': ['nameFile.csv']
        }

    def show_analysis() -> None:
        global stats, aspects, plot, source, tabs
        plot, source = _create_plot()
        events_table = _create_events_table()

        # pylint: disable=unused-argument
        def _events_handler(attr, old, new):
            _update_events(events_table, events_type.active)

        # Toggle display of in-domain / All aspect mentions
        events_type = RadioButtonGroup(
            labels=['All Events', 'In-Domain Events'], active=0)

        analysis_layout = layout([[plot], [events_table]])

        # events_type display toggle disabled
        # analysis_layout = layout([[plot],[events_type],[events_table]])

        analysis_tab = Panel(child=analysis_layout, title="Analysis")
        tabs.tabs.insert(2, analysis_tab)
        tabs.active = 2
        events_type.on_change('active', _events_handler)
        source.selected.on_change('indices', _events_handler)  # pylint: disable=no-member

    # pylint: disable=unused-argument
    def infer_file_callback(attr, old, new):

        # run inference on input data and current aspect/opinion lexicons in view
        global infer_data, stats, aspects

        SENTIMENT_OUT.mkdir(parents=True, exist_ok=True)

        df_aspect = pd.DataFrame.from_dict(asp_filter_src.data)
        aspect_col_list = ['Term', 'Alias1', 'Alias2', 'Alias3']
        df_aspect = df_aspect[aspect_col_list]
        df_aspect.to_csv(SENTIMENT_OUT / 'aspects.csv',
                         index=False,
                         na_rep="NaN")

        df_opinion = pd.DataFrame.from_dict(op_filter_src.data)
        opinion_col_list = ['Term', 'Score', 'Polarity', 'isAcquired']
        df_opinion = df_opinion[opinion_col_list]
        df_opinion.to_csv(SENTIMENT_OUT / 'opinions.csv',
                          index=False,
                          na_rep="NaN")

        solution = SentimentSolution()

        if len(infer_src.data['file_contents']) == 1:
            infer_data = read_csv(infer_src, index_cols=0)
            file_name = infer_src.data['file_name'][0]
            raw_data_path = SENTIMENT_OUT / file_name
            infer_data.to_csv(raw_data_path, header=False)
            print(f'Running_SentimentInference on data...')
            text_status.value = "Running classification on data..."
            stats = solution.run(data=raw_data_path,
                                 aspect_lex=SENTIMENT_OUT / 'aspects.csv',
                                 opinion_lex=SENTIMENT_OUT / 'opinions.csv')
        else:
            f_contents = infer_src.data['file_contents']
            f_names = infer_src.data['file_name']
            raw_data_path = SENTIMENT_OUT / infer_src.data['file_name'][
                0].split('/')[0]
            if not os.path.exists(raw_data_path):
                os.makedirs(raw_data_path)
            for f_content, f_name in zip(f_contents, f_names):
                read_parsed_files(f_content, f_name)
            print(f'Running_SentimentInference on data...')
            text_status.value = "Running classification on data..."
            stats = solution.run(parsed_data=raw_data_path,
                                 aspect_lex=SENTIMENT_OUT / 'aspects.csv',
                                 opinion_lex=SENTIMENT_OUT / 'opinions.csv')

        aspects = pd.read_csv(SENTIMENT_OUT / 'aspects.csv',
                              encoding='utf-8')['Term']
        text_status.value = "Classification completed"
        show_analysis()

    # pylint: disable=unused-argument
    def asp_file_callback(attr, old, new):
        global aspects_data, asp_table_source
        aspects_data = read_csv(asp_src, headers=True)
        # Replaces None values by empty string
        aspects_data = aspects_data.fillna('')
        new_source = ColumnDataSource(aspects_data)
        asp_table_source.data = new_source.data
        asp_table_source.selected.indices = list(range(len(aspects_data)))

    # pylint: disable=unused-argument
    def op_file_callback(attr, old, new):
        global opinions_data, op_table_source, lexicons_dropdown, df_opinion_generic
        df = read_csv(op_src, headers=True)
        # Replaces None values by empty string
        df = df.fillna('')
        # Placeholder for generic opinion lexicons from the given csv file
        df_opinion_generic = df[df['isAcquired'] == 'N']
        # Update the argument value for the callback customJS
        lexicons_dropdown.js_property_callbacks.get(
            'change:value')[0].args['opinion_lex_generic'] \
            = df_opinion_generic.to_dict(orient='list')
        opinions_data = df[df['isAcquired'] == 'Y']
        new_source = ColumnDataSource(opinions_data)
        op_table_source.data = new_source.data
        op_table_source.selected.indices = list(range(len(opinions_data)))

    # pylint: disable=unused-argument
    def txt_status_callback(attr, old, new):
        print("Previous label: " + old)
        print("Updated label: " + new)

    text_status.on_change("value", txt_status_callback)

    asp_src.on_change('data', asp_file_callback)
    # pylint: disable=no-member
    asp_table_source.selected.on_change('indices', asp_selected_change)

    op_src.on_change('data', op_file_callback)
    op_table_source.selected.on_change('indices', op_selected_change)  # pylint: disable=no-member

    train_src.on_change('data', train_file_callback)
    infer_src.on_change('data', infer_file_callback)

    return layout(
        [[_create_header(train_dropdown, inference_dropdown, text_status)],
         [tabs]])