def create_timeseries_celebrities(start_time):
    """
    :return: json file to get timeseries for celebrities
    """
    data = pandas.DataFrame(hour_status)
    celebrity_names = [
        celeb_first[ix] + " " + celeb_last[ix]
        for ix in range(len(celeb_first))
    ]
    data.columns = celebrity_names

    timestamp_rows = []

    for i in range(len(hour_status)):
        time = start_time + i * 3600
        timestamp_rows.append(datetime.datetime.fromtimestamp(time))

    idx = pandas.DatetimeIndex(timestamp_rows)
    data = data.set_index(idx)

    match_data = dict(data)  # data converted into a dictionary
    all_matches = pandas.DataFrame(match_data)
    all_matches[all_matches < 0] = 0

    # plotting
    time_chart = vincent.Line(all_matches[470:])
    time_chart.axis_titles(x='Time in hours', y='Tweet Count')
    time_chart.legend(title='Celebrities')
    time_chart.to_json('../Graphs/Question 6/time_chart_celeb.json')

    return all_matches
Beispiel #2
0
def create_timeseries_topics(start_time):
    # normalize data
    advertisements = np.array(ads_hour_count) / float(max(ads_hour_count))
    celebrities = np.array(celeb_hour_count) / float(max(celeb_hour_count))
    goals = np.array(goal_hour_count) / float(max(goal_hour_count))
    teams = np.array(team_hour_count) / float(max(team_hour_count))

    # print celeb_hour_count
    data = pandas.DataFrame({
        "Advertisements": advertisements,
        "Celebrities": celebrities,
        "Goal Chatter": goals,
        "Team Chatter": teams
    })
    data[data < 0] = 0

    timestamp_rows = []

    for i in range(len(goal_hour_count)):
        time = start_time + i * 3600
        timestamp_rows.append(datetime.datetime.fromtimestamp(time))

    idx = pandas.DatetimeIndex(timestamp_rows)
    data = data.set_index(idx)

    # data converted into a dictionary
    match_data = dict(data)
    # Dataframe for multiple series
    all_matches = pandas.DataFrame(match_data)

    # plotting
    time_chart = vincent.Line(all_matches[470:])
    time_chart.axis_titles(x='Time in hours', y='Tweet Count')
    time_chart.legend(title='Topic Modelling')
    time_chart.to_json('../Graphs/Question 6/time_chart_topics.json')
Beispiel #3
0
def api_vendas_dia():
    import pandas as pd
    resultado = mongo.db.notas_fiscais.aggregate([{
        "$group": {
            "_id": "$nfeProc.NFe.infNFe.ide.dEmi",
            "total": {
                "$sum": "$nfeProc.NFe.infNFe.total.ICMSTot.vNF"
            }
        }
    }, {
        "$sort": {
            "_id": -1
        }
    }, {
        "$limit": 20
    }])

    resultado = resultado['result']
    resultado = pd.DataFrame.from_records(resultado, index="_id")
    line = vincent.Line(resultado, width=540, height=380)
    line.axis_titles(x='', y='Valor vendas')
    line.y_axis_properties(label_align="right",
                           title_offset=-40,
                           title_size=14)
    return line.to_json()
def create_timeseries_ads(start_time):
    """
    :return: json file to get timeseries for advertisements
    """
    df = pandas.DataFrame(hour_status)
    df.columns = graph_ads

    data = (df - df.mean()) / (df.max() - df.min())  # normalize
    data[data < 0] = 0

    timestamp_rows = []

    for i in range(len(hour_status)):
        time = start_time + i * 3600
        timestamp_rows.append(datetime.datetime.fromtimestamp(time))

    idx = pandas.DatetimeIndex(timestamp_rows)
    data = data.set_index(idx)

    match_data = dict(data)  # all the data together
    all_matches = pandas.DataFrame(match_data)
    all_matches[all_matches < 0] = 0
    # all_matches[all_matches == np.NaN] = 0

    # plotting the time-series
    time_chart = vincent.Line(all_matches[470:])
    time_chart.axis_titles(x='Time in hours', y='Tweet Count')
    time_chart.legend(title='Advertisement Names')
    time_chart.to_json('../Graphs/Question 6/time_chart_ads.json')

    return all_matches
    def make_line_chart_popup(data_row:pd.Series, title:str) -> folium.Popup:
        '''Create a line chart popup from temporal Series for departements
        Index of the Series have to be in {year}_median, {year}_decile1, {year}_decile9, {year+1}_median, {year+1}_decile1... format
        this popup can be added in map layers'''
        # filter index names and build 3 columns from one(series)
        data = {
                'decile_1': data_row.filter(regex=".*decile_1$").values,
                'decile_9': data_row.filter(regex=".*decile_9$").values,
                'median': data_row.filter(like="median").values,
                }
        df_to_display = pd.DataFrame.from_dict(data)
        data_row = data_row.drop("color")

        # create index of the dataframe from the inital data_row Series.index
        df_to_display.index = pd.to_datetime(list(dict.fromkeys([int(annee_c[:4]) for annee_c in data_row.index.tolist()])), format="%Y")

        line_chart = vincent.Line(df_to_display,
                                width=300,
                                height=200)
        line_chart.axis_titles(x='Année', y='prix m2')
        line_chart.legend(title=title)

        popup = folium.Popup()
        folium.Vega(line_chart, width = 400, height=250).add_to(popup)
        return popup
Beispiel #6
0
def data_multiline():
    period = request.args.get('period', 9)
    print period
    print request.args
    data = get_data(float(period))
    return vincent.Line(data, width=WIDTH, height=HEIGHT,
                        iter_idx=('x')).to_json()
Beispiel #7
0
def stocks():
    line = vincent.Line(data.price[['MSFT', 'AAPL']],
                        width=WIDTH,
                        height=HEIGHT)
    line.axis_titles(x='Date', y='Price')
    line.legend(title='MSFT vs AAPL')
    return line.to_json()
Beispiel #8
0
def main5(fn):
    with open(fn + '.json', 'r') as f:
        count_all = Counter()
        dates = []
        search_hashes = '#WWAT'

        for line in f:
            tweet = json.loads(line)
            tokens = preprocess(tweet['text'])

            terms = terms_single(terms_hash(tokens) + getHashtags(tweet))

            count_all.update(terms)

            time = getTime(tweet)
            if time is not None and search_hashes in terms: dates.append(time)

    print(count_all.most_common(10))

    per_minute = time_series(dates, '1D')
    per_minute.to_csv(fn + '.series.csv', sep='\t', encoding='utf-8')

    # and now the plotting
    time_chart = vincent.Line(per_minute)
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.to_json(fn + '.count.time_chart.json')
Beispiel #9
0
    def test_to_json(self):
        '''Test json output

        This tests that files are written with the correct names, not that
        the json was serialized correctly.'''
        line = vincent.Line()
        line.tabular_data([1, 2, 3, 4, 5])
        from mock import call, patch, MagicMock

        with patch('__builtin__.open', create=True) as mock_open:
            mock_open.return_value = MagicMock(spec=file)

            path = 'test.json'
            data_path = 'test_data.json'
            default_data_path = 'data.json'
            html_path = 'test.html'
            default_html_path = 'vega_template.html'

            # No data splitting / html
            kwargs_default_behavior = [{}, {
                'split_data': False
            }, {
                'html': False
            }, {
                'data_path': data_path
            }, {
                'html_path': html_path
            }]
            for kwargs in kwargs_default_behavior:
                line.to_json(path, **kwargs)
                mock_open.assert_called_once_with(path, 'w')
                mock_open.reset_mock()

            line.to_json(path, split_data=True)
            mock_open.assert_has_calls(
                [call(path, 'w'),
                 call(default_data_path, 'w')],
                any_order=True)
            mock_open.reset_mock()

            line.to_json(path, split_data=True, data_path=data_path)
            mock_open.assert_has_calls(
                [call(path, 'w'), call(data_path, 'w')], any_order=True)
            mock_open.reset_mock()

            # The HTML option reads a default file that needs a real return
            # value the template substitution.
            mock_open.return_value.read.return_value = '$path'

            line.to_json(path, html=True)
            mock_open.assert_has_calls(
                [call(path, 'w'),
                 call(default_html_path, 'w')],
                any_order=True)
            mock_open.reset_mock()

            line.to_json(path, html=True, html_path=html_path)
            mock_open.assert_has_calls(
                [call(path, 'w'), call(html_path, 'w')], any_order=True)
            mock_open.reset_mock()
def route_popup(schedule, route_id):
    route_df = make_route_df(schedule, route_id)
    vega = vincent.Line(vincent.Data.from_pandas(route_df))
    popup = Vega(vega.to_json(),
                 width=vega.width + 50,
                 height=vega.height + 50)

    return popup
Beispiel #11
0
def plot_time_series(fname: str,
                     export_fname: str,
                     num_top_terms: int,
                     rule: str,
                     term_to_analyze_fname: str,
                     export_fname_for_trending_terms: str,
                     export_fname_for_non_trending_terms: str):

    terms_to_analyze = read_all_important_terms(term_to_analyze_fname)
    terms_date, count_all = analyze(fname, terms_to_analyze=terms_to_analyze)

    idx_list = []
    keys = []
    match_data = {}

    for term_freq_tuple in count_all.most_common(num_top_terms):
        key = term_freq_tuple[0]
        if (key != '') :
            value = terms_date[term_freq_tuple[0]]
            keys.append(key)
            ones = [1] * len(value)
            idx = pandas.DatetimeIndex(value)
            term_time_series = pandas.Series(ones, index=idx)

            # # Resampling / bucketing
            # per_minute = term_time_series.resample(rule).sum().fillna(0)
            # time_bin = term_time_series.resample(rule).sum().fillna(0)
            time_bin = term_time_series.resample(rule).sum().fillna(0)
            match_data[key] = time_bin
            idx_list.append(idx)

    all_matches = pandas.DataFrame(data=match_data,
                                   index=idx_list[0])
    # Resampling as above
    # all_matches = all_matches.resample('1Min', how='sum').fillna(0)

    # all_matches = pandas.DataFrame(data=match_data, index=idx_list[0])
    # Re-sampling for all added series
    all_matches = all_matches.resample(rule).sum().fillna(0)

    print("Terms plotted:")
    print(keys)

    time_chart = vincent.Line(all_matches[keys], width=1150, height=580)
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.legend(title='Term Timeseries')
    time_chart.to_json(export_fname)

    print("Term-Timeseries file exported at [%s]." % export_fname)

    export_terms(terms_date, count_all, keys,
                 export_fname_trending=export_fname_for_trending_terms,
                 export_fname_non_trending=export_fname_for_non_trending_terms)
    print("Terms with top trending terms are exported at [%s]" % export_fname_for_trending_terms)
    print("Terms without top trending terms are exported at [%s]" % export_fname_for_non_trending_terms)
def time_visualizatiton():
    with open(fname, 'r') as f:
        for line in f:
            tweet = json.loads(line)
            # let's focus on hashtags only at the moment
            terms_hash = [
                term for term in preprocess(tweet['text'])
                if term.startswith('#')
            ]
            # track when the hashtag is mentioned
            if '#kaplansba' in terms_hash:
                dates.append(tweet['created_at'])

    # a list of "1" to count the hashtags
    ones = [1] * len(dates)
    # the index of the series
    idx = pandas.DatetimeIndex(dates)
    # the actual series (at series of 1s for the moment)
    my_dates = pandas.Series(ones, index=idx)

    # series is resampled per minute
    per_minute = my_dates.resample('1Min', how='sum').fillna(0)

    time_chart = vincent.Line(my_dates)
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.to_json('time_chart.json')

    # all the data together
    match_data = dict(kaplansba=per_minute_i,
                      amazonstudent=per_minute_s,
                      hello=per_minute_e)
    # we need a DataFrame, to accommodate multiple series
    all_matches = pandas.DataFrame(data=match_data, index=per_minute_i.index)
    # Resampling as above
    all_matches = all_matches.resample('1Min', how='sum').fillna(0)

    # and now the plotting
    time_chart = vincent.Line(
        all_matches[['kaplansba', 'amazonstudent', 'hello']])
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.legend(title='Matches')
    time_chart.to_json('time_chart.json')
Beispiel #13
0
def show_acce(map_data_p, lat0_p, lng0_p, vehicleplate_number_p, num1_p,
              acceleration_data_p, lat_p, lng_p, location_time_p,
              file_out_path_p):
    m = folium.Map([lat0_p, lng0_p], zoom_start=8)

    m.add_child(folium.LatLngPopup())  #在地图上显示经纬度;

    route = folium.PolyLine(  #polyline方法为将坐标用线段形式连接起来
        map_data_p,  #将坐标点连接起来
        weight=3,  #线的大小为3
        color='blue',  #线的颜色为橙色
        opacity=0.8  #线的透明度
    ).add_to(m)  #将这条线添加到刚才的区域m内

    for i in range(num1_p):
        if i != 0 and i % 59 == 0:
            y_data = [acceleration_data_p[j] for j in range(i - 59, i + 1, 1)]
            vis = vincent.Line(y_data, width=320, height=150)
            vis.axis_titles(x=location_time_p[i - 59] + '至' +
                            location_time_p[i] + '的加速度变化',
                            y='单位:m/s^2')
            vis_json = vis.to_json()
            tooltip = location_time_p[i - 59] + '至' + location_time_p[i]

            status = 0
            for k in range(i - 59, i + 1):
                if acceleration_data_p[k] > 3 or acceleration_data_p[
                        k] < -3:  #判断加速度
                    status = 1

            if status == 1:
                folium.Marker(
                    location=[lat_p[i], lng_p[i]],
                    popup=folium.Popup(max_width=3250).add_child(
                        folium.Vega(vis_json, width=380, height=200)),
                    icon=folium.Icon(color='red', icon='info-sign'),
                    tooltip=tooltip).add_to(
                        m
                    )  #在每60条记录处显示一个标记点,且点击标记点可以看到过去60条记录内车辆的加速度变化折线图速度变化折线图
            else:
                folium.Marker(
                    location=[lat_p[i], lng_p[i]],
                    popup=folium.Popup(max_width=3250).add_child(
                        folium.Vega(vis_json, width=380, height=200)),
                    tooltip=tooltip).add_to(
                        m
                    )  #在每60条记录处显示一个标记点,且点击标记点可以看到过去的60条记录内车辆的加速度变化折线图速度变化折线图

    html_path = os.path.join('r', file_out_path_p,
                             vehicleplate_number_p + '_acceleration.html')
    m.save(html_path)  #将结果以HTML形式保存
    webbrowser.open(html_path, new=1)
def time_plot(fname, search_word, classified):
    ext = fname.split('.')[1]
    stop = stopwords.words('english')
    punctuation = string.punctuation.replace('#', '')
    with open(fname, 'r') as FILE:
        dates = []
        if not ext == 'json':
            next(FILE)
            for line in FILE:
                values = line.split(';')
                text = C.unicode_clean(values[len(values) - 1])
                text = text.translate(None, punctuation).strip()
                text = text.replace('RT ', '')
                terms = [
                    term for term in text.lower().split() if term not in stop
                ]
                if search_word.lower() in terms:
                    dates.append(values[1])
        elif ext == 'json':
            for line in FILE:
                information = json.loads(line)
                text = information['text'].encode('unicode_escape')
                text = C.unicode_clean(text)
                text = text.translate(None, punctuation).strip()
                text = text.replace('RT ', '')
                terms = [
                    term for term in text.lower().split() if term not in stop
                ]
                if search_word.lower() in terms:
                    dates.append(information['created_at'])
    # A list of "1" to count the terms
    ones = [1] * len(dates)
    # The index of the series
    idx = pandas.DatetimeIndex(dates)
    # Resampling / bucketing
    bar_time = pandas.Series(ones, index=idx)
    bar_time = bar_time.resample(classified).sum().fillna(0)
    # Creating the Chart
    time_chart = vincent.Line(bar_time)
    time_chart.axis_titles(x='Time', y='Freq')
    if search_word[0] == '#':
        hashtag = search_word[1:]
        time_chart.legend(title='#%s' % hashtag)
        time_chart.to_json('Time_hash_%s.json' % hashtag,
                           html_out=True,
                           html_path='Time_hash_%s.html' % hashtag)
    else:
        time_chart.legend(title='%s' % search_word)
        time_chart.to_json('Time_%s.json' % search_word,
                           html_out=True,
                           html_path='Time_%s.html' % search_word)
Beispiel #15
0
def graphLine(estacion, idioma):
    models.dbToCsv(estacion, idioma)
    tweets = pd.read_csv('static/tweets.csv')
    tweets['created_at'] = pd.to_datetime(pd.Series(tweets['created_at']))

    tweets.set_index('created_at', drop=False, inplace=True)

    tweets_pm = tweets['created_at'].resample('M').count()

    # vincent.core.initialize_notebook()
    line = vincent.Line(tweets_pm)
    line.axis_titles(x='Meses', y='Nº Tweets')
    line.colors(brew='Spectral')
    line.to_json('static/area.json')
    return 0
Beispiel #16
0
    def draw_line(self, data, coordinates, style, label, mplobj=None):
        import vincent  # only import if VincentRenderer is used
        if coordinates != 'data':
            warnings.warn("Only data coordinates supported. Skipping this")
        linedata = {'x': data[:, 0],
                    'y': data[:, 1]}
        line = vincent.Line(linedata, iter_idx='x',
                            width=self.figwidth, height=self.figheight)

        # TODO: respect the other style settings
        line.scales['color'].range = [style['color']]

        if self.chart is None:
            self.chart = line
        else:
            warnings.warn("Multiple plot elements not yet supported")
def build_json_plot(df, prd, state):
    df_r = df[df['LocationAbbr'] == 'LA'].set_index('Year')
    dic = pd.concat([prd[[state]], df_r], axis=1)
    dic.index = np.arange(dic.shape[0])
    dic.columns = ['Prediction %s' % state, 'LocationAbbr', 'True Value'
                   ] + list(dic.columns[3:])

    line = vincent.Line(dic[['True Value',
                             'Prediction %s' % state]],
                        columns=['True Value',
                                 'Prediction %s' % state],
                        key_on='idx')
    line.axis_titles(x='Year', y='Diabete in ' + state)
    line.legend(title='Diabete')
    line.height = 200
    line.width = 200
    return (line.grammar())
Beispiel #18
0
def render_stats(url, stats, method):
    import vincent

    txt = ''
    if 'calls' in stats:
        calls = stats['calls']
        data = [x['duration'] for x in calls]

        if data:
            line = vincent.Line(data)
            line.axis_titles(x='%s %s' % (method, x['url']), y='Duration')
            filepath = STATIC_PATH.join("assets", "%s_%s.json" % (method, url))
            line.to_json(str(filepath))

        txt = """
    <div id="vis_%(url)s_%(method)s" class="vis"></div>

    """ % locals()
    return txt
Beispiel #19
0
def createTSMap(pos, timeSeries, zoom_start=4):
    map = folium.Map(location=pos.items(), zoom_start=4,crs='EPSG4326')

    df = timeSeries;
    df.index = df.index.values.astype('M8[D]')
    chart = vincent.Line(df[['evi','ndvi']],width=300,height=150)
    chart.legend(title='')
    chart.axis_titles(x='dates', y='')

    popup = folium.Popup(max_width=400)
    folium.Vega(chart.to_json(), height=200, width=450).add_to(popup)
    folium.Marker(pos.items(), popup=popup,icon=folium.Icon(color='green',icon='info-sign')).add_to(map)

    wms = folium.features.WmsTileLayer('https://neo.sci.gsfc.nasa.gov/wms/wms',
                                       name='MODIS Data',
                                       format='image/png',
                                       layers='MOD13A2_M_NDVI')
    wms.add_to(map)
    return map
def create_data_plots_map():
    json_files = {}
    dic = {}
    for name in cases.index:
        coord = get_coordinates(name)
        dic[name] = coord
        df = cases_pT_new.T[name].to_frame(name='cases')
        df['deaths*10'] = deaths_pT_new.T[name] * 10
        line = v.Line(df.rolling(7, center=True, min_periods=1).mean())
        line.axis_titles(x='Date', y='per 100k inhabitants')
        line.legend(name)
        line.width = 350
        line.height = 150
        json_files[name] = str(line.to_json())
    df = pd.DataFrame(dic, index=['lat', 'long']).T
    df2 = pd.DataFrame(json_files, index=['json']).T
    df['json'] = df2
    coord = df.copy()
    coord.to_csv('data/coord.csv')
    return True
Beispiel #21
0
    def test_datetimeandserial(self):
        '''Test pandas serialization and datetime parsing'''

        import pandas.io.data as web
        all_data = {}
        for ticker in ['AAPL', 'GOOG']:
            all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2004',
                                                  '1/1/2006')
        price = pd.DataFrame(
            {tic: data['Adj Close']
             for tic, data in all_data.iteritems()})

        scatter = vincent.Scatter()
        scatter.tabular_data(price, columns=['AAPL', 'GOOG'])
        assert scatter.data[0]['values'][0]['x'] == 10.49
        nt.assert_is_none(scatter.data[0]['values'][0]['y'])

        line = vincent.Line()
        line.tabular_data(price, columns=['AAPL'])
        assert line.data[0]['values'][0]['x'] == 1073030400000
    def timeDataVisualization(self):
        dates_Search = []
        with open(self.fname, 'r') as f:
            for line in f:
                tweet = json.loads(line)
                terms_only = [
                    term for term in self._preprocess(tweet.get('text', 'nil'))
                ]
                if 'search' in terms_only:
                    dates_Search.append(tweet['created_at'])
        ones = [1] * len(dates_Search)

        idx = pandas.DatetimeIndex(dates_Search)
        Search = pandas.Series(ones, idx)

        per_minute = Search.resample('1Min', how='sum').fillna(0)

        time_chart = vincent.Line(per_minute)
        time_chart.axis_titles(x="time", y="Freq")
        time_chart.to_json('time_chart.json')
Beispiel #23
0
def main3(fn):
    with open(fn + '.json', 'r') as f:
        count_all = Counter()
        datess = []
        search_hashes = ['#Endomondo', '#MexicoNeedsWWATour']

        for line in f:
            tweet = json.loads(line)
            tokens = preprocess(tweet['text'])

            terms = terms_hash(tokens)
            count_all.update(terms)

            for i in range(0, len(search_hashes)):
                datess.append([])
                if search_hashes[i] in terms:
                    datess[i].append(getTime(tweet))

    print(count_all.most_common(10))

    per_minutes = []
    for dates in datess:
        per_minute = time_series(dates)
        per_minutes.append(per_minute)

    keys = search_hashes
    values = per_minutes

    # all the data together
    match_data = dict(zip(keys, values))
    # we need a DataFrame, to accommodate multiple series
    all_matches = pandas.DataFrame(data=match_data, index=values[0].index)
    # Resampling as above
    all_matches = all_matches.resample('1Min', how='sum').fillna(0)
    #all_matches = all_matches.resample('1D', how='sum').fillna(0)

    # and now the plotting
    time_chart = vincent.Line(all_matches[keys])
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.legend(title='Matches')
    time_chart.to_json(fn + '.time_chart.json')
Beispiel #24
0
def graphLineIdioma(lang, estacion):
    models.dbToCsv(estacion, lang)
    tweets = pd.read_csv('static/tweets.csv')
    #if(tweets['lang'] == lang):
    #tweets['created_at']['lang'] = pd.to_datetime(pd.Series(tweets['created_at'],tweets['lang']))
    lista = []
    for creacion, idioma in tweets.itertuples(index=False):
        if idioma == lang:
            tweets['created_at'] = pd.to_datetime(
                pd.Series(tweets['created_at']))
            #tweets['created_at'] = pd.to_datetime(pd.Series(creacion))

    tweets.set_index('created_at', drop=False, inplace=True)

    tweets_pm = tweets['created_at'].resample('M').count()

    # vincent.core.initialize_notebook()
    line = vincent.Line(tweets_pm)
    line.axis_titles(x='Meses', y='Nº Tweets')
    line.colors(brew='Spectral')
    line.to_json('static/area.json')
    return 0
Beispiel #25
0
def graph(id):
    project = Project.query.get_or_404(id)
    logs = project.logs
    log_number = len([1 for _ in logs])
    if project.logs and log_number > 1:
        logs = project.logs.order_by(ProjectLog.log_date)

        x = [
            datetime.combine(log.log_date, datetime.min.time()).timestamp() *
            1000 for log in logs
        ]
        y = [log.previous_score * score_multiplier for log in logs]

        multi_iter = {'x': x, 'data': y}
        line = vincent.Line(multi_iter, iter_idx='x')

        line.scales['x'] = vincent.Scale(name='x',
                                         type='time',
                                         range='width',
                                         domain=vincent.DataRef(
                                             data='table', field="data.idx"))
        line.scales['y'] = vincent.Scale(name='y',
                                         range='height',
                                         nice=True,
                                         domain=[0, score_multiplier])
        line.scales['color'] = vincent.Scale(name='color',
                                             range=['#12897D'],
                                             type='ordinal')
        line.axes['y'].ticks = 3
        line.axes['x'].ticks = 7

        if line_style:
            line.marks['group'].marks[
                0].properties.enter.interpolate = vincent.ValueRef(
                    value=line_style)

        return jsonify({"status": "success", "data": line.grammar()})
    else:
        return failure_response("No history for this project", 404)
Beispiel #26
0
def test2():

    #import pandas_datareader.data as web
    #import pandas.io.data as web

    #from pandas.io import data, wb # becomes
    from pandas_datareader import data

    all_data = {}
    for ticker in ['AAPL', 'IBM', 'YHOO', 'MSFT']:
        all_data[ticker] = data.get_data_yahoo(ticker, '1/1/2010', '1/1/2012')

    price = pd.DataFrame(
        {tic: data['Adj Close']
         for tic, data in all_data.iteritems()})
    import vincent
    line = vincent.Line(price)
    line.axis_titles(x='Date', y='Price')
    line.legend(title='IBM vs AAPL')
    js = line.to_json(
        'out.json',
        html_out=True,
    )
def plotdataPopVega(data,vals):
    '''
    Fuction to create a data popup, as a time servies.
    What then can be added to a marker
    
    '''
    df=data[vals]
    
    df.fillna(value='null', inplace=True)  # Does not handle missing values.
    line=vincent.Line(df)
    line.axis_titles(x="Time", y="Mass Concentration")
    line.legend(title="Values")
   
    #find the lenght of the data
    width=len(df.index)
    if width <500:
        width=400
    
    line.width=width
    line.height=200
    vega = folium.Vega(json.loads(line.to_json()), width="30%", height="10%")
    popup = folium.Popup(max_width=line.width+75).add_child(vega)
    return popup
Beispiel #28
0
def main2(fn):
    with open(fn + '.json', 'r') as f:
        data_samples = []
        doc_lengths = []
        dates = []

        for line in f:
            tweet = json.loads(line)
            sample, length = preprocess_text(getText(tweet))
            data_samples.append(sample)
            doc_lengths.append(length)
            dates.append(getTime(tweet))

    n_features = 1000
    n_topics = 10
    n_top_words = 20

    #lda_topic(data_samples, n_features, n_topics, n_top_words)
    data_viz, doc_topic_dists = lda_viz(data_samples, doc_lengths, n_features,
                                        n_topics, n_top_words)
    #print(type(data_viz))

    counts = []
    for i in range(0, len(dates)):
        #count = sum(1 for topic_prob in doc_topic_dists[i] if topic_prob >= 0.5)
        count = 1 if doc_topic_dists[i][9] >= 0.5 else 0
        #rint(count)
        counts.append(count)

    per_minute = time_series(dates, '1D', counts)
    per_minute.to_csv(fn + '.topic.csv', sep='\t', encoding='utf-8')

    # and now the plotting
    time_chart = vincent.Line(per_minute)
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.to_json(fn + '.topic.time_chart.json')
Beispiel #29
0
    # f is the file pointer to the JSON data set
    for line in f: #for every tweet
        if line.strip():
            tweet = json.loads(line)
            # let's focus on hashtags only at the moment
            terms_hash = [term for term in preprocess(tweet['text'].translate(non_bmp_map)) if term.startswith('#')]
            # track when the hashtag is mentioned
            count_all.update(terms_hash)
    
        if '#Trump' in terms_hash:
            dates_trump.append(tweet['created_at'])
     
    # a list of "1" to count the hashtags
    ones = [1]*len(dates_trump)
    # the index of the series, find date format
    idx = pandas.DatetimeIndex(dates_trump)
    # the actual series (at series of 1s for the moment)
    trump = pandas.Series(ones, index=idx)
     
    # Resampling / bucketing
    per_minute = trump.resample('1Min').sum().fillna(0)
    
    time_chart = vincent.Line(trump)
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.to_json('time_chart.json', html_out=True, html_path='chart.html')



 
        
Beispiel #30
0
    # Prints the result for co-occurences for search_word
    print("Co-occurences for %s" % search_word)
    print(count_search.most_common(10))

    com_max = []
    # For each term, look for most common co-occurent terms
    for t1 in com:
        t1_max_terms = sorted(com[t1].items(), key=operator.itemgetter(1))
        for t2, t2_count in t1_max_terms:
            com_max.append(((t1, t2), t2_count))
    # Get the most frequent co-occurences
    term_max = sorted(com_max, key=operator.itemgetter(1), reverse=True)
    """print(term_max[:5])"""

    # Print the first 5 most frequent words
    """print(count_all.most_common(10))"""

    # Visual representation without time reference
    """
    word_freq = count_all.most_common(20)
    labels, freq = zip(*word_freq)
    data = {'data':freq, 'x':labels}
    bar = vincent.Bar(data, iter_idx='x')
    bar.to_json('term_freq.json')
    """

    # Visual representation with time reference
    time_chart = vincent.Line(IDE)
    time_chart.axis_titles(x='Time', y='Freq')
    time_chart.to_json('time_chart.json')