def test_simple(self): x = blaze.array([1+1j, 0+2j, 1+2j, blaze.inf, blaze.nan]) y_r = blaze.array([blaze.sqrt(2.), 2, blaze.sqrt(5), blaze.inf, blaze.nan]) y = blaze.abs(x) for i in range(len(x)): assert_almost_equal(y[i], y_r[i])
def plot(self, output_file="termite.html"): t = blz.Data(self.input_file) df = pd.read_csv(self.input_file) MAX = blz.compute(t.weight.max()) MIN = blz.compute(t.weight.min()) # Create a size variable to define the size of the the circle for the plot. t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50) WORDS = t['word'].distinct() WORDS = into(list, WORDS) topics = t['topic'].distinct() topics = into(list, topics) # Convert topics to strings TOPICS = [str(i) for i in topics] source = into(pd.DataFrame, t) plt.output_file(output_file) data_source = ColumnDataSource(source) p = plt.figure(x_range=TOPICS, y_range=WORDS, plot_width=1000, plot_height=1700, title=self.title) p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source) #p.xaxis().major_label_orientation = np.pi/3 logging.info("generating termite plot for file %s" % self.input_file) plt.show(p)
def plot(self, output_file="termite.html"): import blaze as blz from odo import into import pandas as pd import bokeh.plotting as plt from bokeh.models.sources import ColumnDataSource t = blz.Data(self.input_file) MAX = blz.compute(t.weight.max()) MIN = blz.compute(t.weight.min()) # Create a size variable to define the size of the the circle for the plot. t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50) WORDS = t['word'].distinct() WORDS = into(list, WORDS) topics = t['topic'].distinct() topics = into(list, topics) # Convert topics to strings TOPICS = [str(i) for i in topics] source = into(pd.DataFrame, t) plt.output_file(output_file) data_source = ColumnDataSource(source) p = plt.figure(x_range=TOPICS, y_range=WORDS, plot_width=1000, plot_height=1700, title=self.title) p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source) plt.show(p)
def plot(self): t = blz.Data(self.input_file) df = pd.read_csv(self.input_file) MAX = blz.compute(t.weight.max()) MIN = blz.compute(t.weight.min()) # Create a size variable to define the size of the the circle for the plot. t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50) WORDS = t['word'].distinct() WORDS = into(list, WORDS) topics = t['topic'].distinct() topics = into(list, topics) # Convert topics to strings TOPICS = [str(i) for i in topics] source = into(pd.DataFrame, t) data_source = ColumnDataSource(source) p = plt.figure(x_range=TOPICS, y_range=WORDS, plot_width=1000, plot_height=1700, title=None) p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source) #p.xaxis().major_label_orientation = np.pi/3 logging.info("generating termite plot for file %s" % self.input_file) script, div = components(p, CDN) return script, div
def termite(modeled_corpus, plot_title="Termite plot", topn=15): """A Bokeh Termite Visualization for LDA results analysis. Parameters ---------- input_file : str or pandas DataFrame A pandas dataframe from a topik model get_termite_data() containing columns "word", "topic" and "weight". May also be a string, in which case the string is a filename of a csv file with the above columns. title : str The title for your termite plot Examples -------- >>> plot = termite(test_model_output, plot_title="My model results", topn=5) """ prepared_model_vis_data = _termite_data(modeled_corpus, topn) t = blz.Data(prepared_model_vis_data) MAX = blz.compute(t.weight.max()) MIN = blz.compute(t.weight.min()) # Create a size variable to define the size of the the circle for the plot. t = blz.transform(t, size=blz.sqrt((t.weight - MIN) / (MAX - MIN)) * 50) WORDS = t['word'].distinct() WORDS = into(list, WORDS) topics = t['topic'].distinct() topics = into(list, topics) # Convert topics to strings TOPICS = [str(i) for i in topics] source = into(pd.DataFrame, t) data_source = sources.ColumnDataSource(source) p = plt.figure(x_range=TOPICS, y_range=WORDS, plot_width=1000, plot_height=1700, title=plot_title) p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source) return p
def termite(modeled_corpus, plot_title="Termite plot", topn=15): """A Bokeh Termite Visualization for LDA results analysis. Parameters ---------- input_file : str or pandas DataFrame A pandas dataframe from a topik model get_termite_data() containing columns "word", "topic" and "weight". May also be a string, in which case the string is a filename of a csv file with the above columns. title : str The title for your termite plot Examples -------- >>> plot = termite(test_model_output, plot_title="My model results", topn=5) """ prepared_model_vis_data = _termite_data(modeled_corpus, topn) t = blz.Data(prepared_model_vis_data) MAX = blz.compute(t.weight.max()) MIN = blz.compute(t.weight.min()) # Create a size variable to define the size of the the circle for the plot. t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50) WORDS = t['word'].distinct() WORDS = into(list, WORDS) topics = t['topic'].distinct() topics = into(list, topics) # Convert topics to strings TOPICS = [str(i) for i in topics] source = into(pd.DataFrame, t) data_source = sources.ColumnDataSource(source) p = plt.figure(x_range=TOPICS, y_range=WORDS, plot_width=1000, plot_height=1700, title=plot_title) p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source) return p
def distance(lat1, lon1, lat2, lon2, R=3959): # http://andrew.hedges.name/experiments/haversine/ dlon = radians(lon2 - lon1) dlat = radians(lat2 - lat1) a = sin(dlat / 2.0) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2.0) ** 2 return R * 2 * atan2(sqrt(a), sqrt(1 - a))
def test_cos(self): a = blaze.array([0, math.pi/6, math.pi/3, 0.5*math.pi, math.pi, 1.5*math.pi, 2*math.pi]) b = blaze.array([1, 0.5*blaze.sqrt(3), 0.5, 0, -1, 0, 1]) assert_allclose(blaze.cos(a), b, rtol=1e-15, atol=1e-15) assert_allclose(blaze.cos(-a), b, rtol=1e-15, atol=1e-15)
def test_sin(self): a = blaze.array([0, math.pi/6, math.pi/3, 0.5*math.pi, math.pi, 1.5*math.pi, 2*math.pi]) b = blaze.array([0, 0.5, 0.5*blaze.sqrt(3), 1, 0, -1, 0]) assert_allclose(blaze.sin(a), b, rtol=1e-15, atol=1e-15) assert_allclose(blaze.sin(-a), -b, rtol=1e-15, atol=1e-15)
def test_sqrt(self): a = blaze.array([0., 9., 64., 1e20, 12345]) b = blaze.array([0., 3., 8., 1e10, math.sqrt(12345)]) result = blaze.sqrt(a) assert_almost_equal(result, b)