def test_analyze_bounds(self): """ Testing the bounds of the tweets values """ ana = Analyzer() assert ana.analyze("this is a test neutral tweet") <= 1.0 assert ana.analyze("this is a test neutral tweet") >= 0.0
def testall(directory, pred_file=None, label_file=None, out_path=None): folders = os.listdir(directory) networks = [] for folder in folders: if os.path.isfile(directory+folder+"/network.cfg") and os.path.exists(directory+folder+"/results"): networks.append(folder) config_file = directory+networks[0]+"/network.cfg" config = ConfigParser.ConfigParser() config.read(config_file) test_data = LoadData(directory = config.get('Testing Data', 'folders').split(','), data_file_name = config.get('Testing Data', 'data_file'), label_file_name = config.get('Testing Data', 'label_file'), seg_file_name = config.get('Testing Data', 'seg_file')) res = Analyzer(raw = test_data.get_data()[0], target = test_data.get_labels()[0]) for net in networks: config_file = directory+net+"/network.cfg" config = ConfigParser.ConfigParser() config.read(config_file) res.add_results(results_folder = config.get('General','directory'), name = net, prediction_file = config.get('Testing', 'prediction_file')+'_0', learning_curve_file = 'learning_curve') res.analyze(-1, pred_file=pred_file, label_file=label_file, out_path=out_path) return res
def test_analyze_judgement_weight(self): """ Testing the value order of arbitrary tweets """ ana = Analyzer() assert ana.analyze("i am so happy, great day :D") > ana.analyze("i am so happy :D") assert ana.analyze("so sad, feeling depressed :'(") < ana.analyze("so depressed :'(")
def test_analyze_empty(self): """ Testing empty tweets and tweets including words not in the dictionary """ ana = Analyzer() assert ana.analyze("") == 0.5 assert ana.analyze("hzoehfsdl") == 0.5
def test_analyze_judgement(self): """ Testing the proper judgement of the sentiment analysis: * positive and negative * best and worse tweet values """ ana = Analyzer() assert ana.analyze(":)") > 0.5 and ana.analyze(":'(") < 0.5 assert ana.analyze("yahoo yahoo yahoo") == 1.0 assert ana.analyze("zzz zzz zzz zzz zzz") == 0.0
def test_categories_weight(self): """ Testing the weights of the different categorie sums (positive, negative, neutral) """ ana = Analyzer() ctg_total = {'positive': 0.0, 'negative': 0.0, 'neutral': 0.0} ctg_count = {'positive': 4, 'negative': 2, 'neutral': 1} data = [2, 3, 0, 2, 2, 0, -4, 0, 0, -2, 2] tot_pos, tot_neg, tot_neu = ana.weight_categories(data, ctg_total, ctg_count) assert (tot_pos, tot_neg, tot_neu) == (99.47646509317096, -49.392885301738836, 3.9750077625545726)
def test_categories_cardinality(self): """ Testing the cardinality of the different categorie sums (positive, negative, neutral) """ ana = Analyzer() ctg_count = {'positive': 0, 'negative': 0, 'neutral': 0} text = 'great day today lol ;) but still have to work' assert ana.categories_cardinality(text, ctg_count) == 15 assert ctg_count['positive'] == 4 # great day lol ;) assert ctg_count['neutral'] == 1 # today assert ctg_count['negative'] == 2 # work still
def test_categories_weight(self): """ Testing the weights of the different categorie sums (positive, negative, neutral) """ ana = Analyzer() ctg_total = {'positive': 0.0, 'negative': 0.0, 'neutral': 0.0} ctg_count = {'positive': 4, 'negative': 2, 'neutral': 1} data = [2, 3, 0, 2, 2, 0, -4, 0, 0, -2, 2] tot_pos, tot_neg, tot_neu = ana.weight_categories( data, ctg_total, ctg_count) assert (tot_pos, tot_neg, tot_neu) == (99.47646509317096, -49.392885301738836, 3.9750077625545726)
def testprediction(config_file, pred_file=None, label_file=None, out_path=None): config = ConfigParser.ConfigParser() config.read(config_file) test_data = LoadData(directory = config.get('Testing Data', 'folders').split(','), data_file_name = config.get('Testing Data', 'data_file'), label_file_name = config.get('Testing Data', 'label_file'), seg_file_name = config.get('Testing Data', 'seg_file')) res = Analyzer(raw = test_data.get_data()[0], target = test_data.get_labels()[0]) res.add_results(results_folder = config.get('General','directory'), name = config_file.split('/')[-3], prediction_file = config.get('Testing', 'prediction_file')+'_0', learning_curve_file = 'learning_curve') res.analyze(-1, pred_file=pred_file, label_file=label_file, out_path=out_path) return res
def analyze(chart_ids: List[str] = [], src: str = CHART_PATH, dest: str = default_excel_path): """ Analyzes charts given a list of IDs. If you want to analyze all levels in src, don't input any IDs. """ if len(chart_ids) == 0: with os.scandir(src) as dir_items: chart_ids = [ cid.name for cid in dir_items if is_chart_folder(cid.path) ] if len(chart_ids) == 0: click.echo("No charts in the folder!") stat_list = dict() src = os.path.abspath(src) dest = os.path.abspath(dest) os.makedirs(os.path.dirname(dest), exist_ok=True) with click.progressbar(chart_ids, label=f"Analyzing {len(chart_ids)} charts...", item_show_func=lambda x: x) as prog_bar: for chart_id in prog_bar: analyzer = Analyzer(src, chart_id) analyzer.start() stats = analyzer.get_stats_as_json() stat_list[chart_id] = stats click.echo(f"Done analyzing, now saving to {dest}...") dest_folder = os.path.dirname(dest) os.makedirs(dest_folder, exist_ok=True) stat_df = pd.DataFrame.from_dict(stat_list, orient="index") stat_df.index.name = "chart_id" excel_writer = ExcelWriter(stat_df, dest) excel_writer.format_table() excel_writer.close() click.echo("Stats successfully saved.")
def home(): if request.method == 'GET': # render homepage template return render_template('boot.html') else: # grab POST form data data = request.form # parse as JSON jsondata = json.dumps(data, separators=(',', ':')) if 'topic' in jsondata: # load data into dictionary new_data = json.loads(jsondata) # create random number for this graph new_data['rand'] = str(int(random.random() * 999999999)) # connect to twitter auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = tweepy.API(auth) # get the tweets tweets = stream.gather_tweets( api, auth, keyword=new_data['topic'][0], limit=int(new_data['limit'][0])) # Create analyzer analyzer = Analyzer(tweets, new_data['topic'][0]) analyzer.save_sentiment_data(int(new_data['rand'])) # render results page return redirect((url_for('log', data=json.dumps(new_data), mode='debug'))) elif 'username' in jsondata: # load data into dictionary new_data = json.loads(jsondata) # create random number for this graph new_data['rand'] = str(int(random.random() * 999999999)) # connect to twitter auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = tweepy.API(auth) # get the tweets tweets = stream.gather_tweets( api, auth, username=new_data['username'][0], limit=50) # Create analyzer analyzer = Analyzer(tweets, new_data['username'][0]) analyzer.save_sentiment_data(int(new_data['rand'])) return redirect((url_for('log', data=json.dumps(new_data), mode='debug')))
def test_analyze_judgement_weight(self): """ Testing the value order of arbitrary tweets """ ana = Analyzer() assert ana.analyze("i am so happy, great day :D") > ana.analyze( "i am so happy :D") assert ana.analyze("so sad, feeling depressed :'(") < ana.analyze( "so depressed :'(")
def ViewResults(**kwargs): directory = kwargs.get("directory", "") network = kwargs.get("network", None) prediction_file = kwargs.get("predictions_file", None) if network: # Assume that all networks are tested on the same set of data config = ConfigParser.ConfigParser() config.read("networks/" + network + "/network.cfg") data = LoadData( directory=config.get("Testing Data", "folders").split(",")[0], data_file_name=config.get("Testing Data", "data_file"), label_file_name=config.get("Testing Data", "label_file"), ) if not prediction_file: prediction_file = "test_prediction_0" results = Analyzer(target=data.get_labels()[0], raw=data.get_data()[0]) results.add_results(results_folder="networks/" + network + "/", name=network, prediction_file=prediction_file) else: folders = os.listdir(directory) networks = [] for folder in folders: if os.path.isfile(directory + folder + "/network.cfg"): networks.append(folder) # Assume that all networks are tested on the same set of data config = ConfigParser.ConfigParser() config.read(directory + networks[0] + "/network.cfg") data = LoadData( directory=config.get("Testing Data", "folders").split(",")[0], data_file_name=config.get("Testing Data", "data_file"), label_file_name=config.get("Testing Data", "label_file"), ) if not prediction_file: prediction_file = "test_prediction_0" results = Analyzer(target=data.get_labels()[0], raw=data.get_data()[0]) for net in networks: results.add_results(results_folder=directory + net + "/", name=net, prediction_file=prediction_file) return results
h = TweetLoader('', path='data/backup/', filename='hillary_2016-07-13.json') t = TweetLoader('', path='data/backup/', filename='trump_2016-07-13.json') h.load() t.load() # Join them together full_tweets = pd.concat([h.tweets, t.tweets]) # Assign label (second array) for Hillary(0)/Trump(1) tweets label_array = np.array([0] * len(h.tweets) + [1] * len(t.tweets)) # Run through part of the model to get the PCA results and loading factors # This is not the full model, just a part of it for illustration purposes max_words = 50 mod = Analyzer(full_tweets['text'], labels=label_array, max_words=max_words, load_pca=False) # mod.load_words() mod.get_words() mod.create_dtm() mod.run_pca() loadings = mod.loadings loadings.index = ['PC' + str(j + 1) for j in range(len(loadings))] # loadings = loadings.iloc[0:30, :] # Use only a subset of the data loadings = loadings.transpose() # Use rotation words = loadings.columns.tolist() pc_names = loadings.index.tolist()
# t = TweetLoader('realDonaldTrump') h = TweetLoader('', path='data/backup/', filename='hillary_2016-07-13.json') t = TweetLoader('', path='data/backup/', filename='trump_2016-07-13.json') h.load() t.load() # Join them together full_tweets = pd.concat([h.tweets, t.tweets]) # Assign label (second array) for Hillary(0)/Trump(1) tweets label_array = np.array([0]*len(h.tweets) + [1]*len(t.tweets)) # Run through part of the model to get the PCA results and loading factors # This is not the full model, just a part of it for illustration purposes max_words = 50 mod = Analyzer(full_tweets['text'], labels=label_array, max_words=max_words, load_pca=False) # mod.load_words() mod.get_words() mod.create_dtm() mod.run_pca() loadings = mod.loadings loadings.index = ['PC'+str(j+1) for j in range(len(loadings))] # loadings = loadings.iloc[0:30, :] # Use only a subset of the data loadings = loadings.transpose() # Use rotation words = loadings.columns.tolist() pc_names = loadings.index.tolist()
import matplotlib.pyplot as plt # Load tweets s2 = TweetLoader(filename='coolstars.json', track_location=False, path='coolstars19/data/') s2.load() df = s2.tweets.copy() df.index = pd.DatetimeIndex(df['created_at']) # Using the Analyzer class max_words = 100 mod = Analyzer(df['text'], None, max_words=max_words, load_pca=False, load_svm=False, more_stop_words=['rt', 'cs19', 'cs19_uppsala']) mod.get_words() mod.create_dtm() mod.run_pca() # Exploration print_dtm(mod.dtm, df['text'], 42) # Top terms in components top_factors(mod.load_squared, 0) # Plots make_biplot(mod.pcscores, None, mod.loadings, 0, 1)
print("Unknown model " + args.model + ".\n") exit() # Send model weights to the device model.to(args.device) print(model) #%% """ ################### Initialize model and analyzer save ################### """ # Apply weight initialization model.apply(initializer) # Create an analyzer object analyzer = Analyzer(args) #%% """ ################### Create optimizer ################### """ # Optimizer and Loss optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, threshold=1e-6) # Use cross-entropy loss if args.model in ['ae', 'vae', 'wae', 'vae_flow']: criterion = nn.L1Loss()
def main(argv=None): #read in params if argv is None: argv = sys.argv[1:] file = 'tulalens_survey_sample.csv' facet = 'result id' #standard python parsing for command line options opts = [] args = [] try: opts, args = getopt.getopt(argv, "hl", ["help", "list", "file=", "facet="]) except getopt.GetoptError as msg: print(sys.stderr, msg) print >>sys.stderr, "For help use --help" return 2 if len(args): print >>sys.stderr, "Invalid arg(s) %s"%args usage() return 2 for (opt, val) in opts: if opt in ("-h", "--help"): usage() return 0 if opt in ("-l", "--list"): list() return 0 elif opt in ("--file"): file = val elif opt in ("--facet"): facet = val.lower() else: usage() return 2 print("facet: %s" % facet) #check if facet given is in the list of survey questions #ideally this allows for quick entries with just the #question number, e.g. "--facet Q30" long_q = '' #keep track of the long form for later use valid_facet = False for long, short in SHORT_QUESTIONS.items(): #print("checking question: %s" % question) if facet in long: #turn the facet into easy to use question ids #p = "(^q\d\d?[.]).*" #m = re.match(p, long) facet = short long_q = long print("Question selected: %s" % long_q) valid_facet = True break if not valid_facet: sys.exit("facet selected is not a survey question") #parse csv file parser = CsvParse(file) answers = parser.parse() #generate analysis based on options #print("number of answer rows after parse: %s" % len(answers)) analyze = Analyzer(answers) #find the unique occurrence of each answer to the question answers_count = analyze.group_by(facet) mean = analyze.find_mean(facet, answers_count) sys.exit()
from analysis import Analyzer from helper_functions import print_dtm, top_factors, make_biplot import pandas as pd import matplotlib.pyplot as plt # Load tweets s2 = TweetLoader(filename='coolstars.json', track_location=False, path='coolstars19/data/') s2.load() df = s2.tweets.copy() df.index = pd.DatetimeIndex(df['created_at']) # Using the Analyzer class max_words = 100 mod = Analyzer(df['text'], None, max_words=max_words, load_pca=False, load_svm=False, more_stop_words=['rt', 'cs19', 'cs19_uppsala']) mod.get_words() mod.create_dtm() mod.run_pca() # Exploration print_dtm(mod.dtm, df['text'], 42) # Top terms in components top_factors(mod.load_squared, 0) # Plots make_biplot(mod.pcscores, None, mod.loadings, 0, 1)
404 error handler used if a non existant route is requested """ return render_template('404.html'), 404 @app.errorhandler(500) def page_not_found(exc): """ 500 error handler used if there is a server error """ return render_template('500.html'), 500 if __name__ == '__main__': analyzer = Analyzer() server = SocketIOServer(('', PORT), app, resource="socket.io") tw_thread = TweetWeather(server, analyzer, name="Tweet-Weather-Thread") tw_thread.daemon = True gevent.spawn(tw_thread.new_post, server) gevent.spawn(tw_thread.connexion_lost, server) print "Application Started: http://localhost:5000" try: server.serve_forever() except KeyboardInterrupt: tw_thread.stop() server.stop() sys.exit()
# Merge tweets together, pass to Analyzer df_tweets = pd.concat([h.tweets['text'], t.tweets['text']], axis=0, join='outer', join_axes=None, ignore_index=True, keys=None, levels=None, names=None, verify_integrity=False) # Using the Analyzer class mod = Analyzer(df_tweets, label_array, max_words=max_words, load_pca=False, load_svm=False, use_sentiment=True) # mod.get_words() # mod.create_dtm() # mod.run_pca() # mod.get_sentiment() # test_predict, test_label = mod.run_svm() # One-line alternative with defaults test_predict, test_label = mod.create_full_model() # Check a PCA plot # mod.make_biplot(2, 3, max_arrow=0.2)
def __init__(self, market): super(MarketThread, self).__init__() self.market = market def run(self): while not self._stop.isSet(): time.sleep(settings.HEARTBEAT) self.market.update() if __name__ == "__main__": q = Queue.Queue() p = Portfolio(20000) e = Executor(p) a = Analyzer(portfolio=p) m = Market(queue=q) trading_thread = TradingThread(queue=q, analyzer=a, events=e) market_thread = MarketThread(market=m) def receive_signal(signum, stack): print("You quit") trading_thread.stop() market_thread.stop() sys.exit(0) market_thread.start() trading_thread.start() signal.signal(signal.SIGINT, receive_signal)
print(tweet.text.encode('utf-8')) tweets.append(tweet._json) count += 1 if count > limit: break return tweets elif keyword: l = StdOutListener() stream = Stream(auth, l) global lim lim = limit stream.filter(track=[keyword]) with open('tweet_stream.pickle', 'rb') as f: return pickle.load(f) else: raise ValueError('Invalid Arguments. username and keyword both' + 'can\'t be None') if __name__ == '__main__': s = 'baltimore' auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = tweepy.API(auth) # tweets = gather_tweets(username=s) # last 30 tweets tweets = gather_tweets(keyword=s, limit=30) # Create analyzer analyzer = Analyzer(tweets, s) avg = analyzer.calc_sentiment() #keywrds = analyzer.get_keywords() analyzer.save_sentiment_data()
# timer STOP = time.time() print(f"\t-----> Done.") print(f"\t-----> Execution time: {round(STOP-START, 2)} sec") if __name__ == "__main__": app_settings = { 'client_id': os.getenv('SPOTIFY_CLIENT_ID'), 'client_secret': os.getenv('SPOTIFY_CLIENT_SECRET'), 'redirect_uri': os.getenv('SPOTIFY_REDIRECT_URI') } # init analyzer az = Analyzer(**app_settings) # get tracks and simulate lengths # get all playlists playlists = az.user_playlists(is_author=True) start = time.time() print("-----> Gathering all tracks...", end="") # get all tracks all_tracks = [] for playlist in playlists: tracks = az.playlist_tracks(playlist['id']) # append the playlist meta data # to the track objects for i in range(len(tracks)): tracks[i]['playlist'] = playlist
# h = TweetLoader('HillaryClinton') # t = TweetLoader('realDonaldTrump') h = TweetLoader('', path='data/backup/', filename='hillary_2016-07-13.json') t = TweetLoader('', path='data/backup/', filename='trump_2016-07-13.json') h.load() t.load() # Assign label (second array) for Hillary(0)/Trump(1) tweets label_array = np.array([0]*len(h.tweets) + [1]*len(t.tweets)) # Merge tweets together, pass to Analyzer df_tweets = pd.concat([h.tweets['text'], t.tweets['text']], axis=0, join='outer', join_axes=None, ignore_index=True, keys=None, levels=None, names=None, verify_integrity=False) # Using the Analyzer class mod = Analyzer(df_tweets, label_array, max_words=max_words, load_pca=False, load_svm=False, use_sentiment=True) # mod.get_words() # mod.create_dtm() # mod.run_pca() # mod.get_sentiment() # test_predict, test_label = mod.run_svm() # One-line alternative with defaults test_predict, test_label = mod.create_full_model() # Check a PCA plot # mod.make_biplot(2, 3, max_arrow=0.2) # Check results cm = mod.make_confusion_matrix(test_label, test_predict, normalize=False, axis=0, label_names=['Clinton', 'Trump'])
"pem_name", help= "Name of the PEM file that is needed to connect to the data collection servers." ) parser.add_argument( "database_ip", help="IP of the Postgres database that the results will be put into.") parser.add_argument("data_collector_ips", nargs='+', help="List of IPs of the data collection servers.") args = parser.parse_args() ec = External_Connector(args.pem_name, args.database_ip) # Create list of local files, first is twitter data, rest is news data files = [ "%s%d.txt" % (args.type, i) for i in range(0, len(args.data_collector_ips)) ] ec.get_data_files(args.data_collector_ips, files) a = Analyzer() # Run three analyses for each data file and upload them to database for f in files: sentiment, mood, emoticon = a.run(args.type, f) ec.insert_sentiment(args.run_id, args.type, sentiment) ec.insert_mood(args.run_id, args.type, mood) ec.insert_emoticon(args.run_id, args.type, emoticon)
import pandas as pd import numpy as np # Some global defaults max_words = 200 # Load most recent tweets from Hillary Clinton and Donald Trump # s = TweetLoader(filename='search.json', track_location=True) s = TweetLoader(filename='search_2016-07-13.json', track_location=True, path='data/backup/') s.load() # Calculate and grab model results mod = Analyzer(s.tweets['text'], max_words=max_words, load_pca=True, load_svm=True) predict = mod.load_full_model() # Hillary=0 Trump=1 s.tweets['predict'] = predict # Clean up missing coordinates df = s.tweets['geo.coordinates'] bad = df.apply(lambda x: x is None) df = df[~bad] s.tweets = s.tweets[~bad] lat = df.apply(lambda x: x[0]) lon = df.apply(lambda x: x[1]) # lat, lon = zip(*df) # Alternate # Remove Alaska and Hawaii
def main(argv=None): #read in params if argv is None: argv = sys.argv[1:] file = 'tulalens_survey_sample.csv' facet = 'result id' #standard python parsing for command line options opts = [] args = [] try: opts, args = getopt.getopt(argv, "hl", ["help", "list", "file=", "facet="]) except getopt.GetoptError as msg: print(sys.stderr, msg) print >> sys.stderr, "For help use --help" return 2 if len(args): print >> sys.stderr, "Invalid arg(s) %s" % args usage() return 2 for (opt, val) in opts: if opt in ("-h", "--help"): usage() return 0 if opt in ("-l", "--list"): list() return 0 elif opt in ("--file"): file = val elif opt in ("--facet"): facet = val.lower() else: usage() return 2 print("facet: %s" % facet) #check if facet given is in the list of survey questions #ideally this allows for quick entries with just the #question number, e.g. "--facet Q30" long_q = '' #keep track of the long form for later use valid_facet = False for long, short in SHORT_QUESTIONS.items(): #print("checking question: %s" % question) if facet in long: #turn the facet into easy to use question ids #p = "(^q\d\d?[.]).*" #m = re.match(p, long) facet = short long_q = long print("Question selected: %s" % long_q) valid_facet = True break if not valid_facet: sys.exit("facet selected is not a survey question") #parse csv file parser = CsvParse(file) answers = parser.parse() #generate analysis based on options #print("number of answer rows after parse: %s" % len(answers)) analyze = Analyzer(answers) #find the unique occurrence of each answer to the question answers_count = analyze.group_by(facet) mean = analyze.find_mean(facet, answers_count) sys.exit()
from bokeh.sampledata.us_states import data as states from bokeh.models import ColumnDataSource, HoverTool import reverse_geocoder as rg import pandas as pd import numpy as np # Some global defaults max_words = 200 # Load most recent tweets from Hillary Clinton and Donald Trump # s = TweetLoader(filename='search.json', track_location=True) s = TweetLoader(filename='search_2016-07-13.json', track_location=True, path='data/backup/') s.load() # Calculate and grab model results mod = Analyzer(s.tweets['text'], max_words=max_words, load_pca=True, load_svm=True) predict = mod.load_full_model() # Hillary=0 Trump=1 s.tweets['predict'] = predict # Clean up missing coordinates df = s.tweets['geo.coordinates'] bad = df.apply(lambda x: x is None) df = df[~bad] s.tweets = s.tweets[~bad] lat = df.apply(lambda x: x[0]) lon = df.apply(lambda x: x[1]) # lat, lon = zip(*df) # Alternate # Remove Alaska and Hawaii del states["HI"]
___author__ = 'Ahmed Hani Ibrahim' from reader import DataReader from analysis import Analyzer file_path = './data/data_science_dataset_wuzzuf.csv' reader = DataReader(file_path) data = reader.read_data() analyzer = Analyzer(data) analyzer.trending_category() x = 0
import matplotlib.pyplot as plt import seaborn as sns sys.path.insert(1, "../tools") from analysis import Analyzer from plotting import Plotter from training import Trainer if __name__ == "__main__": sns.set() plot_dir = "plots" plot_file = os.path.join(plot_dir, "rdf.png") if not os.path.exists(plot_dir): os.mkdir(plot_dir) anl = Analyzer() plter = Plotter() r_cut = 6.0 r, rdf = anl.calculate_rdf("trajs/training.traj", r_max=r_cut) rdf[np.nonzero(rdf)] /= max(rdf) cutoff = plter.polynomial(r, r_cut, gamma=5.0) plt.plot(r, rdf, label="Radial distribution function") plt.plot(r, cutoff, label="Polynomial cutoff, gamma=5.0") plt.legend() plt.title("Copper radial distribution function") plt.xlabel("Radial distance [Angstrom]") plt.ylabel("Radial distribution function (normalized to 1)") plt.savefig(plot_file)
trn.create_Gs(elements, num_radial_etas, num_angular_etas, num_zetas, angular_type) symm_funcs["Selected"] = trn.Gs G2 = make_symmetry_functions(elements=elements, type="G2", etas=[0.05, 0.23, 1.0, 5.0], centers=np.zeros(4)) G4 = make_symmetry_functions( elements=elements, type="G4", etas=0.005 * np.ones(1), zetas=[1.0, 4.0], gammas=[1.0, -1.0], ) symm_funcs["Default"] = G2 + G4 anl = Analyzer() plter = Plotter() r, rdf = anl.calculate_rdf(train_traj, r_max=cutoff.Rc) for label, symm_func in symm_funcs.items(): plter.plot_symmetry_functions( label + "_rad.png", label + "_ang.png", symm_func, rij=r, rdf=rdf, cutoff=cutoff, )
def analyze(self, expr): name = "f" self.analyzer = Analyzer(expr) self.function_view.set_from_expression(expr, name=name + "(x)") self.function_view.set_font_size(40) box = ListBox("Dominio") self.box.pack_start(box, False, False, 0) domain_block = EqualBlock(TextBlock("D(f)"), TextBlock(interval_to_string(self.analyzer.domain))) box.make_row_with_child(domain_block) box = ListBox("Raíces") self.box.pack_start(box, False, False, 0) roots_block = TextBlock(set_to_string(self.analyzer.roots.keys())) box.make_row_with_child(roots_block) box = ListBox("Signo") self.box.pack_start(box, False, False, 0) if self.analyzer.positive.__class__ != sympy.EmptySet: positive_block = TextBlock("+ " + interval_to_string(self.analyzer.positive)) box.make_row_with_child(positive_block) if self.analyzer.negative.__class__ != sympy.EmptySet: negative_block = TextBlock("- " + interval_to_string(self.analyzer.negative)) box.make_row_with_child(negative_block) box = ListBox("Continuidad") self.box.pack_start(box, False, False, 0) if self.analyzer.continuity == self.analyzer.domain: block = TextBlock("f es continua en todo su dominio.") else: block = TextBlock("f es continua para los x %s %s\n" % (Chars.BELONGS, interval_to_string(self.analyzer.continuity))) box.make_row_with_child(block) box = ListBox("Ramas") self.box.pack_start(box, False, False, 0) if self.analyzer.branches[sympy.oo] is not None: block = TextBlock("f posee %s cuando" % Branch.get_name(*self.analyzer.branches[sympy.oo])) row = box.make_row_with_child(block) trend_block = TrendBlock(TextBlock("x"), TextBlock("+" + Chars.INFINITY)) trend_block.set_margin_left(10) row.add_child(trend_block) if self.analyzer.branches[-sympy.oo] is not None: block = TextBlock("f posee %s cuando" % Branch.get_name(*self.analyzer.branches[-sympy.oo])) row = box.make_row_with_child(block) trend_block = TrendBlock(TextBlock("x"), TextBlock("-" + Chars.INFINITY)) trend_block.set_margin_left(10) row.add_child(trend_block) box = ListBox("Crecimiento") self.box.pack_start(box, False, False, 0) block = MathView.new_from_expression(self.analyzer.derived, name + "'(x)") box.make_row_with_child(block) if self.analyzer.derived_things.negative.__class__ != sympy.EmptySet: block = TextBlock(name + " decrece en ") row = box.make_row_with_child(block) row.add_child(make_interval_points(self.analyzer.derived_things.negative)) if self.analyzer.derived_things.positive.__class__ != sympy.EmptySet: block = TextBlock(name + " crece en ") row = box.make_row_with_child(block) row.add_child(make_interval_points(self.analyzer.derived_things.positive)) mins, maxs = self.analyzer.get_minimums_and_maximums() if mins: block = TextBlock("Mínimos: ") row = box.make_row_with_child(block) for point in mins: _x = MathView.new_from_expression(point[0]) _y = MathView.new_from_expression(point[1]) block = PointBlock(_x, _y) row.add_child(block) if maxs: block = TextBlock("Máximos: ") row = box.make_row_with_child(block) for point in maxs: _x = MathView.new_from_expression(point[0]) _y = MathView.new_from_expression(point[1]) block = PointBlock(_x, _y) row.add_child(block) box = ListBox("Concavidad") self.box.pack_start(box, False, False, 0) block = MathView.new_from_expression(self.analyzer.derived2, name + "''(x)") box.make_row_with_child(block) if self.analyzer.derived2_things.positive.__class__ != sympy.EmptySet: block = TextBlock("f tiene concavidad positiva en: ") row = box.make_row_with_child(block) row.add_child(make_interval_points(self.analyzer.derived2_things.positive)) if self.analyzer.derived2_things.negative.__class__ != sympy.EmptySet: block = TextBlock("f tiene concavidad negativa en: ") row = box.make_row_with_child(block) row.add_child(make_interval_points(self.analyzer.derived2_things.negative)) _analyzer = Analyzer(self.analyzer.derived) mins, maxs = _analyzer.get_minimums_and_maximums() inflection_points = mins + maxs if inflection_points: block = TextBlock("Puntos de inflexión: ") row = box.make_row_with_child(block) for point in inflection_points: _x = MathView.new_from_expression(point[0]) _y = MathView.new_from_expression(point[1]) block = PointBlock(_x, _y) block.set_margin_right(10) row.add_child(block) self.show_all()
test_traj = "test.traj" steps, test_traj = trjbd.integrate_atoms( test_atoms, test_traj, n_test, save_interval, timestep=timestep, convert=True ) amp_test_traj = "amp_test.traj" steps, amp_test_traj = trjbd.integrate_atoms( amp_test_atoms, amp_test_traj, n_test, save_interval, timestep=timestep, convert=True, ) anl = Analyzer() r, rdf = anl.calculate_rdf(test_traj, r_max=6.0) r_amp, rdf_amp = anl.calculate_rdf(amp_test_traj, r_max=6.0) rdf_plot = system + "_" + "rdf.png" plter.plot_rdf(rdf_plot, legend, r, rdf, rdf_amp) steps, energy_exact, energy_amp = anl.calculate_pot_energy_diff( test_traj, amp_test_traj, save_interval=save_interval ) pot_plot = system + "_" + "pot.png" plter.plot_pot_energy_diff(pot_plot, legend, steps, energy_exact, energy_amp) steps, energy_exact, energy_amp = anl.calculate_energy_diff( test_traj, amp_test_traj, save_interval=save_interval ) energy_plot = system + "_" + "energy.png"
# h = TweetLoader('HillaryClinton') # t = TweetLoader('realDonaldTrump') h = TweetLoader('', path='data/backup/', filename='hillary_2016-07-13.json') t = TweetLoader('', path='data/backup/', filename='trump_2016-07-13.json') h.load() t.load() # Assign label (second array) for Hillary(0)/Trump(1) tweets label_array = np.array([0]*len(h.tweets) + [1]*len(t.tweets)) df_tweets = pd.concat([h.tweets['text'], t.tweets['text']], axis=0, join='outer', join_axes=None, ignore_index=True, keys=None, levels=None, names=None, verify_integrity=False) # Using the Analyzer class to get sentiments mod = Analyzer(df_tweets, label_array) mod.get_sentiment() # Group together tweets, labels, and sentiments temp = pd.concat([h.tweets, t.tweets], axis=0, join='outer', join_axes=None, ignore_index=True, levels=None) df = pd.concat([temp, mod.sentiment, pd.DataFrame({'label': label_array})], axis=1, levels=None) # Get Tweet text and URLs for embedding: https://twitter.com/{user}/status/{id} def print_and_get_url(tweet): print tweet['text'].values[0] print 'https://twitter.com/{}/status/{}'.format(tweet['user.screen_name'].values[0], tweet['id'].values[0]) # Most positive and negative tweet print_and_get_url(df.sort_values(by='positive', ascending=False)[df['label'] == 0]) print_and_get_url(df.sort_values(by='positive', ascending=False)[df['label'] == 1])
# Assign label (second array) for Hillary(0)/Trump(1) tweets label_array = np.array([0] * len(h.tweets) + [1] * len(t.tweets)) df_tweets = pd.concat([h.tweets['text'], t.tweets['text']], axis=0, join='outer', join_axes=None, ignore_index=True, keys=None, levels=None, names=None, verify_integrity=False) # Using the Analyzer class to get sentiments mod = Analyzer(df_tweets, label_array) mod.get_sentiment() # Group together tweets, labels, and sentiments temp = pd.concat([h.tweets, t.tweets], axis=0, join='outer', join_axes=None, ignore_index=True, levels=None) df = pd.concat([temp, mod.sentiment, pd.DataFrame({'label': label_array})], axis=1, levels=None)