def confusion_matrix(): ''' Creates a full confusion matrix for the top 15 varieties and displays it. Currently changes to vectorizer and model must be done manually. ''' wrangler = Data_Handler('data/cleaned_data.csv') df = wrangler.get_top_num(15) stops = wrangler.stop_words X = df['description'] y = df['variety'] X_train, X_test, y_train, y_test = train_test_split(X, y) vecto = TfidfVectorizer(stop_words=stops) X_train = vecto.fit_transform(X_train) X_test = vecto.transform(X_test) model = ComplementNB() model.fit(X_train, y_train) class_sort = [ 'Pinot Noir', 'Cabernet Sauvignon', 'Red Blend', 'Bordeaux-style Red Blend', 'Syrah', 'Merlot', 'Zinfandel', 'Sangiovese', 'Malbec', 'Nebbiolo', 'Rosé', 'Chardonnay', 'Sauvignon Blanc', 'Riesling', 'White Blend' ] plot_confusion_matrix(model, X_test, y_test, normalize='true', xticks_rotation='vertical', labels=class_sort, include_values=False) plt.show()
def pickling(): ''' Creates and pickles both the vectorizer and model for use in prediction. Parameters ---------- None Returns ---------- None ''' wrangler = Data_Handler('data/cleaned_data.csv') stops = wrangler.stop_words df = wrangler.get_top_num(15) X = df['description'] y = df['variety'] vecto = TfidfVectorizer(stop_words=stops) X = vecto.fit_transform(df['description']) f = open('pickles/text_vec.pkl', 'wb') pickle.dump(vecto, f) model = ComplementNB() model.fit(X, y) m = open('pickles/model.pkl', 'wb') pickle.dump(model, m)
def elmo_data_prep(): ''' Takes in data to be vectorized and goes through a cleaning and lemmatization process so it plays licely with ELMo. ''' start = time.time() print(time.asctime(time.localtime(start))) wrangler = Data_Handler('data/cleaned_data.csv') df = wrangler.get_top_num(15) stops = wrangler.stop_words X = df['description'] y = df['variety'] # Scrubbing methods punctuation = ',.!"#$%&()*+-/:;<=>?@[\\]^_`{|}~' df['description'] = df['description'].apply( lambda x: ''.join(ch for ch in str(x) if ch not in set(punctuation))) df['description'] = df['description'].str.lower() df['description'] = df['description'].str.replace("[0-9]", " ") df['description'] = df['description'].apply( lambda x: ' '.join([word for word in x.split() if word not in stops])) df['description'] = lemmatization(df['description']) # Saves data to new .csv df.to_csv('data/elmo_prepped_data.csv') print(df.head()) print(time.time() - start)
def perform_embedding(self): """Use an instance of the Data_Handler class to tokenize the data. POS-tagging is enforced. """ DH = Data_Handler() self.Train_X = DH.clean_tokenize(self.Train_X) self.Test_X = DH.clean_tokenize(self.Test_X)
def perform_embedding(self): """Read the pre-trained GloVe file. Then, use the Data_Handler class to create a matrix of weights for the words/tokens present in our corpus. """ DH = Data_Handler() #Parse pre-trained embedding. raw_embedding = DH.load_embedding('./../data/glove.6B.%dd.txt' % self._embedding_dim) #Create matrix with embedding for the words present in the corpus. self.embedding_vectors = DH.get_weight_matrix( raw_embedding, self.tokenizer.word_index)
def get_team_members(): team = dh.teams_info() team.print_player_names() while True: print( f"\nThe players are ordered by {team.ordering}. Would you like to" f" reorder the players by one of their other statistics? (Y/N): ") entry = get_input1() if entry == 'y': while True: print("Players can be reordered by: \ Last name, Jersey, Position, Salary, Experience, Age, Weight, Height") print("Type in the name of the stat\ you would like to reorder by: ") stat = get_input2() if team.reorder(stat): # reordered and printed successfully break else: print("Input is invalid. Try again.") elif entry == 'n': break else: print("Invalid input. Please try again.")
def word_cloud(): ''' Creates a wordcloud for a given variety to be chosen on line 75. ''' wrangler = Data_Handler('data/cleaned_data.csv') # df = wrangler.full df = wrangler.get_certain_varieties(['Chardonnay']) text = ' '.join(review for review in df['description']) stops = wrangler.stop_words wordcloud = WordCloud(stopwords=stops, background_color='white', width=500, height=600, colormap='viridis').generate(text) plt.figure(figsize=(6, 6), dpi=250) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.show()
def test_reorder_standings(self): team_standings = dh.search_team_standings(2020) team_standings.reorder('team name') self.assertEqual(team_standings.ordering, 'team name') city_team_last_alphabet = team_standings._eastern_conference[0].city self.assertEqual(city_team_last_alphabet, 'Philadelphia') team_standings.reorder('percentage') city_team_last_percentage = team_standings._western_conference[0].city self.assertEqual(city_team_last_percentage, 'Los Angeles')
def top_x_words(num): ''' Prints out the top num words most highly weighted by tf-idf for a certain number of varieties. Selection of varieties can be changed on line 23. ''' wrangler = Data_Handler('data/cleaned_data.csv') pn = wrangler.get_top_num(15) stops = wrangler.stop_words vectorizer = TfidfVectorizer(max_features=100, stop_words=stops) vectorizer.fit(pn['description']) X = vectorizer.transform(pn['description']) model = ComplementNB() model.fit(X, pn['variety']) feature_words = vectorizer.get_feature_names() target_names = model.classes_ for var in range(len(target_names)): print(f"\nTarget: {var}, name: {target_names[var]}") log_prob = model.feature_log_prob_[var] i_topn = np.argsort(log_prob)[::-1][:num] features_topn = [feature_words[i] for i in i_topn] print(f"Top {num} tokens: ", features_topn)
def _fit(self, data): ''' Takes in the data for the recommender to be trained and fit to. Parameters ---------- data - The filepath to the data being fit. Returns ---------- None ''' wrangler = Data_Handler(data) df = wrangler.get_top_num(15) X = df['description'] y = df['variety'] X = self.vecto.fit_transform(X) self.nb.fit(X, y) X = self.nb.predict_proba(X) self.rf.fit(X, y)
def test_ordering_players_based_on_stats(self): team = dh.get_team('HOU') team.reorder("Salary") top_paid_player = team._players[0] top_paid_player_name = top_paid_player.full_name self.assertEqual(top_paid_player_name, 'Russell Westbrook') team.reorder("Height") tallest_player = team._players[0] tallest_player_name = tallest_player.full_name self.assertEqual(tallest_player_name, 'Tyson Chandler') team = dh.get_team('LAL') team.reorder("Weight") heaviest_player = team._players[0] heaviest_player_name = heaviest_player.full_name self.assertEqual(heaviest_player_name, 'JaVale McGee') team.reorder("Age") oldest_player = team._players[0] oldest_player_name = oldest_player.full_name self.assertEqual(oldest_player_name, 'LeBron James')
def test_print_out_player(self): team = dh.get_team('HOU') team.reorder("Salary") top_paid_player = team._players[0] self.assertEqual( str(top_paid_player), f"Russell Westbrook of the Houston Rockets" f" is 31 years old." f"\nHis team plays in both the Western conference and the Southwest division." f"\nHe is 190.5cm tall and weighs 90.9kg." f"\nHis salary is $38,506,482 and he has been playing in" f" the NBA for 12 years.\n" "\nFurther player details:\n" '{:<40}'.format("\nBirthday: 12/11/1988") + '{:<40}'.format("Birth City: Long Beach") + '{:<40}'.format("\nBirth State: California") + '{:<40}'.format("College: UCLA") + '{:<40}'.format("\nPosition: PG") + '{:<40}'.format("Jersey Number: 0"))
def graph_top_num(num): ''' Graphs the number of reviews for the top num varieties in the dataset. ''' data_handler = Data_Handler('data/cleaned_data.csv') varietals = list(data_handler.freq_dict.keys()) counts = list(data_handler.freq_dict.values()) sort_idx = np.argsort(counts) top_idx = sort_idx[-1:-(num + 1):-1] top_varietals = [varietals[idx] for idx in top_idx] top_counts = [counts[idx] for idx in top_idx] fig, ax = plt.subplots(figsize=(15, 15), dpi=100) x = list(range(num)) ax.bar(x, top_counts, tick_label=top_varietals) ax.set_xlabel('Varieties', fontsize=20) ax.set_ylabel('Reviews', fontsize=20) ax.set_title(f'Top {num} Reviewed Varieties', fontsize=25) plt.xticks(rotation=65, fontsize=15) plt.tight_layout() plt.show()
def order_standings(season_year): team_standings = dh.search_team_standings(season_year) team_standings.get_standings() while True: print(f"\nThe standings are ordered by {team_standings.ordering}. \ Would you like to reorder the standings by one of the \ other columns? (Y/N): ") entry = get_input3() if entry == 'y': while True: print("Type in the name of the column \ you would like to reorder by: ") column = get_input4() if team_standings.reorder(column): team_standings.get_standings() break else: print("Input is invalid. Try again.") elif entry == 'n': break else: print("Invalid input. Please try again.")
pickle.dump(model, m) if __name__ == '__main__': ''' Due to the nature of the functions here I haven't instantiated argparse. Honestly opening this file up and directly changing the parameter dictionary seems more straightforward and easier than having to pass in 10 values on the command line, especially when juggling multiple hyper parameters. Keeping whatever was found best in the last grid search as the only values. ''' # params = {'max_features': [None], 'n-grams': [(1,1)]} wrangler = Data_Handler('data/cleaned_data.csv') stops = wrangler.stop_words df = wrangler.get_top_num(15) y = df['variety'].to_numpy() # X = df['description'].to_numpy() i = open('data/elmo_vectors.pkl', 'rb') elmo = pickle.load(i) # df = df + 2.5 # print(df.shape) # print(np.mean(df)) # print(np.max(df)) # print(np.min(df)) model_testing(stops, elmo, y) # vectorizer_hyper_test(params, stops, X, y)
def setUp(self): dh.import_team_data() dh.import_player_data() dh.import_past_two_seasons_data()
from data_handler import Data_Handler as dh # need to import data from API # another comment dh.import_team_data() dh.import_player_data() dh.import_past_two_seasons_data() def get_input1(): return input().lower() def get_input2(): return input().lower() def get_input3(): return input().lower() def get_input4(): return input().lower() def menu(): print("\nMain menu: \n\ 1 - Search for the statistics of an active player in the NBA\n\ 2 - View the team standings for the 2018/2019 season \n\ 3 - View the team standings for the 2019/2020 season \n\ 4 - View the current players of a particular team\n\
class Manager(object): """ Master object for dosimeter operation. Initializes other classes, tracks time intervals, and converts the counts from Sensor into a CPM to give to the server. time_interval is the interval (in seconds) over for which CPM is calculated. """ # Note: keep the __init__() keywords identical to the keywords in argparse, # in order to avoid unpacking them individually. # The None's are handled differently, depending on whether test mode. def __init__(self, network_LED_pin=NETWORK_LED_PIN, counts_LED_pin=COUNTS_LED_PIN, signal_pin=SIGNAL_PIN, noise_pin=NOISE_PIN, sender_mode=DEFAULT_SENDER_MODE, interval=None, config=None, publickey=None, hostname=DEFAULT_HOSTNAME, port=None, verbosity=None, log=False, logfile=None, datalog=None, datalogflag=False, protocol=DEFAULT_PROTOCOL, test=None, ): self.quit_after_interval = False self.protocol = protocol self.datalog = datalog self.datalogflag = datalogflag self.a_flag() self.d_flag() self.make_data_log(self.datalog) self.handle_input(log, logfile, verbosity, test, interval, config, publickey) self.test = test # LEDs if RPI: self.network_LED = LED(network_LED_pin) self.counts_LED = LED(counts_LED_pin) else: self.network_LED = None self.counts_LED = None # other objects self.sensor = Sensor( counts_LED=self.counts_LED, verbosity=self.v, logfile=self.logfile) self.data_handler = Data_Handler( manager=self, verbosity=self.v, logfile=self.logfile, network_led=self.network_LED) self.sender = ServerSender( manager=self, mode=sender_mode, port=port, verbosity=self.v, logfile=self.logfile) self.init_log() # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender self.branch = '' self.data_handler.backlog_to_queue() def init_log(self): """ Post log message to server regarding Manager startup. """ # set working directory cwd = os.getcwd() os.chdir(GIT_DIRECTORY) branch = subprocess.check_output( ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).rstrip() self.vprint(3, 'Found git branch: {}'.format(branch)) commit = subprocess.check_output( ['git', 'rev-parse', '--short', 'HEAD']).rstrip() self.vprint(3, 'Found commit: {}'.format(commit)) os.chdir(cwd) msg_code = BOOT_LOG_CODE msg_text = 'Booting on {} at {}'.format(branch, commit) self.vprint(1, 'Sending log message: [{}] {}'.format( msg_code, msg_text)) try: self.sender.send_log(msg_code, msg_text) except (socket.gaierror, socket.error, socket.timeout): self.vprint(1, 'Failed to send log message, network error') if self.network_LED: self.network_LED.start_blink( interval=NETWORK_LED_BLINK_PERIOD_S) else: self.vprint(2, 'Success sending log message') if self.network_LED: if self.network_LED.blinker: self.network_LED.stop_blink() self.network_LED.on() def a_flag(self): """ Checks if the -a from_argparse is called. If it is called, sets the path of the data-log to DEFAULT_DATALOG. """ if self.datalogflag: self.datalog = DEFAULT_DATALOG def d_flag(self): """ Checks if the -d from_argparse is called. If it is called, sets datalogflag to True. """ if self.datalog: self.datalogflag = True def make_data_log(self, file): if self.datalogflag: with open(file, 'a') as f: pass def handle_input(self, log, logfile, verbosity, test, interval, config, publickey): # resolve logging defaults if log and logfile is None: # use default file if logging is enabled logfile = DEFAULT_LOGFILE if logfile and not log: # enable logging if logfile is specified # (this overrides a log=False input which wouldn't make sense) log = True if log: self.logfile = logfile else: self.logfile = None # set up verbosity if verbosity is None: if test: verbosity = 2 else: verbosity = 1 self.v = verbosity set_verbosity(self, logfile=logfile) if log: self.vprint(1, '') self.vprint(1, 'Writing to logfile at {}'.format(self.logfile)) self.test = test self.running = False # resolve defaults that depend on test mode if self.test: if interval is None: self.vprint( 2, "No interval given, using default for TEST MODE") interval = DEFAULT_INTERVAL_TEST else: if interval is None: self.vprint( 2, "No interval given, using default for NORMAL MODE") interval = DEFAULT_INTERVAL_NORMAL if config is None: self.vprint(2, "No config file given, " + "attempting to use default config path") config = DEFAULT_CONFIG if publickey is None: self.vprint(2, "No publickey file given, " + "attempting to use default publickey path") publickey = DEFAULT_PUBLICKEY self.interval = interval if self.datalogflag: self.vprint( 1, 'Writing CPM to data log at {}'.format(self.datalog)) if config: try: self.config = Config(config, verbosity=self.v, logfile=self.logfile) except IOError: raise IOError( 'Unable to open config file {}!'.format(config)) else: self.vprint( 1, 'WARNING: no config file given. Not posting to server') self.config = None if publickey: try: self.publickey = PublicKey( publickey, verbosity=self.v, logfile=self.logfile) except IOError: raise IOError( 'Unable to load publickey file {}!'.format(publickey)) else: self.vprint( 1, 'WARNING: no public key given. Not posting to server') self.publickey = None self.aes = None # gets checked in sender. feature in manager_d3s def run(self): """ Start counting time. This method does NOT return, so run in a subprocess if you want to keep control. However, setting self.running = False will stop, as will a KeyboardInterrupt. """ this_start, this_end = self.get_interval(time.time()) self.vprint( 1, ('Manager is starting to run at {}' + ' with intervals of {}s').format( datetime_from_epoch(this_start), self.interval)) self.running = True try: while self.running: self.vprint(3, 'Sleeping at {} until {}'.format( datetime_from_epoch(time.time()), datetime_from_epoch(this_end))) try: self.sleep_until(this_end) except SleepError: self.vprint(1, 'SleepError: system clock skipped ahead!') # the previous start/end times are meaningless. # There are a couple ways this could be handled. # 1. keep the same start time, but go until time.time() # - but if there was actually an execution delay, # the CPM will be too high. # 2. set start time to be time.time() - interval, # and end time is time.time(). # - but if the system clock was adjusted halfway through # the interval, the CPM will be too low. # The second one is more acceptable. self.vprint( 3, 'former this_start = {}, this_end = {}'.format( datetime_from_epoch(this_start), datetime_from_epoch(this_end))) this_start, this_end = self.get_interval( time.time() - self.interval) self.handle_cpm(this_start, this_end) if self.quit_after_interval: self.vprint(1, 'Reboot: taking down Manager') self.stop() self.takedown() os.system('sudo {0} {1}'.format( REBOOT_SCRIPT, self.branch)) this_start, this_end = self.get_interval(this_end) except KeyboardInterrupt: self.vprint(1, '\nKeyboardInterrupt: stopping Manager run') self.stop() self.takedown() except SystemExit: self.vprint(1, '\nSystemExit: taking down Manager') self.stop() self.takedown() def stop(self): """Stop counting time.""" self.running = False def sleep_until(self, end_time, retry=True): """ Sleep until the given timestamp. Input: end_time: number of seconds since epoch, e.g. time.time() """ catching_up_flag = False sleeptime = end_time - time.time() self.vprint(3, 'Sleeping for {} seconds'.format(sleeptime)) if sleeptime < 0: # can happen if flushing queue to server takes longer than interval sleeptime = 0 catching_up_flag = True time.sleep(sleeptime) if self.quit_after_interval and retry: # SIGQUIT signal somehow interrupts time.sleep # which makes the retry argument needed self.sleep_until(end_time, retry=False) now = time.time() self.vprint( 2, 'sleep_until offset is {} seconds'.format(now - end_time)) # normally this offset is < 0.1 s # although a reboot normally produces an offset of 9.5 s # on the first cycle if not catching_up_flag and (now - end_time > 10 or now < end_time): # raspberry pi clock reset during this interval # normally the first half of the condition triggers it. raise SleepError def get_interval(self, start_time): """ Return start and end time for interval, based on given start_time. """ end_time = start_time + self.interval return start_time, end_time def data_log(self, file, **kwargs): """ Writes cpm to data-log. """ time_string = time.strftime("%Y-%m-%d %H:%M:%S") cpm, cpm_err = kwargs.get('cpm'), kwargs.get('cpm_err') if self.datalogflag: with open(file, 'a') as f: f.write('{0}, {1}, {2}'.format(time_string, cpm, cpm_err)) f.write('\n') self.vprint(2, 'Writing CPM to data log at {}'.format(file)) def handle_cpm(self, this_start, this_end): """ Get CPM from sensor, display text, send to server. """ cpm, cpm_err = self.sensor.get_cpm(this_start, this_end) counts = int(round(cpm * self.interval / 60)) self.data_handler.main( self.datalog, cpm, cpm_err, this_start, this_end, counts) def takedown(self): """Delete self and child objects and clean up GPIO nicely.""" # sensor self.sensor.cleanup() del(self.sensor) try: GPIO.cleanup() except NameError: # not on a Raspberry Pi so no GPIO pass # send the rest of the queue object to DEFAULT_DATA_BACKLOG_FILE upon # shutdown self.data_handler.send_all_to_backlog() # self. can I even do this? del(self) @classmethod def from_argparse(cls): """ Initialize a Manager instance using arguments from the command line. For usage: python manager.py -h """ # Note: keep the keywords identical to the keywords in __init__(), # to avoid individual handling of arguments. # The arguments with default=None depend on test state. # They are handled in __init__() # Also, LED pin numbers could be added here if you want. parser = argparse.ArgumentParser( description="Manager for the DoseNet radiation detector") # test mode parser.add_argument( '--test', '-t', action='store_true', default=False, help='Start in test mode (no config, 30s intervals)') # interval: default depends on whether test mode is enabled parser.add_argument( '--interval', '-i', type=int, default=None, help=('Interval of CPM measurement, in seconds' + ' (default 300 for normal mode)')) # verbosity parser.add_argument( '--verbosity', '-v', type=int, default=None, help='Verbosity level (0 to 3) (default 1)') parser.add_argument( '--log', '-l', action='store_true', default=False, help='Enable file logging of all verbose text (default off)') parser.add_argument( '--logfile', '-f', type=str, default=None, help='Specify file for logging (default {})'.format( DEFAULT_LOGFILE)) # config file and public key parser.add_argument( '--config', '-c', default=None, help='Specify a config file (default {})'.format(DEFAULT_CONFIG)) parser.add_argument( '--publickey', '-k', default=None, help='Specify a publickey file (default {})'.format( DEFAULT_PUBLICKEY)) # server address and port parser.add_argument( '--hostname', '-s', default=DEFAULT_HOSTNAME, help='Specify a server hostname (default {})'.format( DEFAULT_HOSTNAME)) parser.add_argument( '--port', '-p', type=int, default=None, help='Specify a port for the server ' + '(default {} for UDP, {} for TCP)'.format( DEFAULT_UDP_PORT, DEFAULT_TCP_PORT)) parser.add_argument( '--sender-mode', '-m', type=str, default=DEFAULT_SENDER_MODE, choices=['udp', 'tcp', 'UDP', 'TCP'], help='The network protocol used in sending data ' + '(default {})'.format(DEFAULT_SENDER_MODE)) # datalog parser.add_argument( '--datalog', '-d', default=None, help='Specify a path for the datalog (default {})'.format( DEFAULT_DATALOG)) parser.add_argument( '--datalogflag', '-a', action='store_true', default=False, help='Enable logging local data (default off)') # communication protocal parser.add_argument( '--protocol', '-r', default=DEFAULT_PROTOCOL, help='Specify what communication protocol is to be used ' + '(default {})'.format(DEFAULT_PROTOCOL)) args = parser.parse_args() arg_dict = vars(args) mgr = Manager(**arg_dict) return mgr
def __init__(self, network_LED_pin=NETWORK_LED_PIN, power_LED_pin=POWER_LED_PIN, counts_LED_pin=COUNTS_LED_PIN, signal_pin=SIGNAL_PIN, noise_pin=NOISE_PIN, sender_mode=DEFAULT_SENDER_MODE, interval=None, config=None, publickey=None, hostname=DEFAULT_HOSTNAME, port=None, verbosity=None, log=False, logfile=None, datalog=None, datalogflag=False, protocol=DEFAULT_PROTOCOL, test=None, ): self.quit_after_interval = False self.protocol = protocol self.datalog = datalog self.datalogflag = datalogflag self.a_flag() self.d_flag() self.make_data_log(self.datalog) self.handle_input(log, logfile, verbosity, test, interval, config, publickey) self.test = test # LEDs if RPI: self.power_LED = LED(power_LED_pin) self.network_LED = LED(network_LED_pin) self.counts_LED = LED(counts_LED_pin) self.power_LED.on() else: self.power_LED = None self.network_LED = None self.counts_LED = None # other objects self.sensor = Sensor( counts_LED=self.counts_LED, verbosity=self.v, logfile=self.logfile) self.data_handler = Data_Handler( manager=self, verbosity=self.v, logfile=self.logfile, network_led=self.network_LED) self.sender = ServerSender( manager=self, mode=sender_mode, port=port, verbosity=self.v, logfile=self.logfile) self.init_log() # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender self.branch = '' self.data_handler.backlog_to_queue()
class Manager(object): """ Master object for dosimeter operation. Initializes other classes, tracks time intervals, and converts the counts from Sensor into a CPM to give to the server. time_interval is the interval (in seconds) over for which CPM is calculated. """ # Note: keep the __init__() keywords identical to the keywords in argparse, # in order to avoid unpacking them individually. # The None's are handled differently, depending on whether test mode. def __init__(self, network_LED_pin=NETWORK_LED_PIN, power_LED_pin=POWER_LED_PIN, counts_LED_pin=COUNTS_LED_PIN, signal_pin=SIGNAL_PIN, noise_pin=NOISE_PIN, sender_mode=DEFAULT_SENDER_MODE, interval=None, config=None, publickey=None, hostname=DEFAULT_HOSTNAME, port=None, verbosity=None, log=False, logfile=None, datalog=None, datalogflag=False, protocol=DEFAULT_PROTOCOL, test=None, ): self.quit_after_interval = False self.protocol = protocol self.datalog = datalog self.datalogflag = datalogflag self.a_flag() self.d_flag() self.make_data_log(self.datalog) self.handle_input(log, logfile, verbosity, test, interval, config, publickey) self.test = test # LEDs if RPI: self.power_LED = LED(power_LED_pin) self.network_LED = LED(network_LED_pin) self.counts_LED = LED(counts_LED_pin) self.power_LED.on() else: self.power_LED = None self.network_LED = None self.counts_LED = None # other objects self.sensor = Sensor( counts_LED=self.counts_LED, verbosity=self.v, logfile=self.logfile) self.data_handler = Data_Handler( manager=self, verbosity=self.v, logfile=self.logfile, network_led=self.network_LED) self.sender = ServerSender( manager=self, mode=sender_mode, port=port, verbosity=self.v, logfile=self.logfile) self.init_log() # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender self.branch = '' self.data_handler.backlog_to_queue() def init_log(self): """ Post log message to server regarding Manager startup. """ # set working directory cwd = os.getcwd() os.chdir(GIT_DIRECTORY) branch = subprocess.check_output( ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).rstrip() self.vprint(3, 'Found git branch: {}'.format(branch)) commit = subprocess.check_output( ['git', 'rev-parse', '--short', 'HEAD']).rstrip() self.vprint(3, 'Found commit: {}'.format(commit)) os.chdir(cwd) msg_code = BOOT_LOG_CODE msg_text = 'Booting on {} at {}'.format(branch, commit) self.vprint(1, 'Sending log message: [{}] {}'.format( msg_code, msg_text)) try: self.sender.send_log(msg_code, msg_text) except (socket.gaierror, socket.error, socket.timeout): self.vprint(1, 'Failed to send log message, network error') if self.network_LED: self.network_LED.start_blink( interval=NETWORK_LED_BLINK_PERIOD_S) else: self.vprint(2, 'Success sending log message') if self.network_LED: if self.network_LED.blinker: self.network_LED.stop_blink() self.network_LED.on() def a_flag(self): """ Checks if the -a from_argparse is called. If it is called, sets the path of the data-log to DEFAULT_DATALOG. """ if self.datalogflag: self.datalog = DEFAULT_DATALOG def d_flag(self): """ Checks if the -d from_argparse is called. If it is called, sets datalogflag to True. """ if self.datalog: self.datalogflag = True def make_data_log(self, file): if self.datalogflag: with open(file, 'a') as f: pass def handle_input(self, log, logfile, verbosity, test, interval, config, publickey): # resolve logging defaults if log and logfile is None: # use default file if logging is enabled logfile = DEFAULT_LOGFILE if logfile and not log: # enable logging if logfile is specified # (this overrides a log=False input which wouldn't make sense) log = True if log: self.logfile = logfile else: self.logfile = None # set up verbosity if verbosity is None: if test: verbosity = 2 else: verbosity = 1 self.v = verbosity set_verbosity(self, logfile=logfile) if log: self.vprint(1, '') self.vprint(1, 'Writing to logfile at {}'.format(self.logfile)) self.test = test self.running = False # resolve defaults that depend on test mode if self.test: if interval is None: self.vprint( 2, "No interval given, using default for TEST MODE") interval = DEFAULT_INTERVAL_TEST else: if interval is None: self.vprint( 2, "No interval given, using default for NORMAL MODE") interval = DEFAULT_INTERVAL_NORMAL if config is None: self.vprint(2, "No config file given, " + "attempting to use default config path") config = DEFAULT_CONFIG if publickey is None: self.vprint(2, "No publickey file given, " + "attempting to use default publickey path") publickey = DEFAULT_PUBLICKEY self.interval = interval if self.datalogflag: self.vprint( 1, 'Writing CPM to data log at {}'.format(self.datalog)) if config: try: self.config = Config(config, verbosity=self.v, logfile=self.logfile) except IOError: raise IOError( 'Unable to open config file {}!'.format(config)) else: self.vprint( 1, 'WARNING: no config file given. Not posting to server') self.config = None if publickey: try: self.publickey = PublicKey( publickey, verbosity=self.v, logfile=self.logfile) except IOError: raise IOError( 'Unable to load publickey file {}!'.format(publickey)) else: self.vprint( 1, 'WARNING: no public key given. Not posting to server') self.publickey = None self.aes = None # gets checked in sender. feature in manager_d3s def run(self): """ Start counting time. This method does NOT return, so run in a subprocess if you want to keep control. However, setting self.running = False will stop, as will a KeyboardInterrupt. """ this_start, this_end = self.get_interval(time.time()) self.vprint( 1, ('Manager is starting to run at {}' + ' with intervals of {}s').format( datetime_from_epoch(this_start), self.interval)) self.running = True try: while self.running: self.vprint(3, 'Sleeping at {} until {}'.format( datetime_from_epoch(time.time()), datetime_from_epoch(this_end))) try: self.sleep_until(this_end) except SleepError: self.vprint(1, 'SleepError: system clock skipped ahead!') # the previous start/end times are meaningless. # There are a couple ways this could be handled. # 1. keep the same start time, but go until time.time() # - but if there was actually an execution delay, # the CPM will be too high. # 2. set start time to be time.time() - interval, # and end time is time.time(). # - but if the system clock was adjusted halfway through # the interval, the CPM will be too low. # The second one is more acceptable. self.vprint( 3, 'former this_start = {}, this_end = {}'.format( datetime_from_epoch(this_start), datetime_from_epoch(this_end))) this_start, this_end = self.get_interval( time.time() - self.interval) self.handle_cpm(this_start, this_end) if self.quit_after_interval: self.vprint(1, 'Reboot: taking down Manager') self.stop() self.takedown() os.system('sudo {0} {1}'.format( REBOOT_SCRIPT, self.branch)) this_start, this_end = self.get_interval(this_end) except KeyboardInterrupt: self.vprint(1, '\nKeyboardInterrupt: stopping Manager run') self.stop() self.takedown() except SystemExit: self.vprint(1, '\nSystemExit: taking down Manager') self.stop() self.takedown() def stop(self): """Stop counting time.""" self.running = False def sleep_until(self, end_time, retry=True): """ Sleep until the given timestamp. Input: end_time: number of seconds since epoch, e.g. time.time() """ catching_up_flag = False sleeptime = end_time - time.time() self.vprint(3, 'Sleeping for {} seconds'.format(sleeptime)) if sleeptime < 0: # can happen if flushing queue to server takes longer than interval sleeptime = 0 catching_up_flag = True time.sleep(sleeptime) if self.quit_after_interval and retry: # SIGQUIT signal somehow interrupts time.sleep # which makes the retry argument needed self.sleep_until(end_time, retry=False) now = time.time() self.vprint( 2, 'sleep_until offset is {} seconds'.format(now - end_time)) # normally this offset is < 0.1 s # although a reboot normally produces an offset of 9.5 s # on the first cycle if not catching_up_flag and (now - end_time > 10 or now < end_time): # raspberry pi clock reset during this interval # normally the first half of the condition triggers it. raise SleepError def get_interval(self, start_time): """ Return start and end time for interval, based on given start_time. """ end_time = start_time + self.interval return start_time, end_time def data_log(self, file, cpm, cpm_err): """ Writes cpm to data-log. """ time_string = time.strftime("%Y-%m-%d %H:%M:%S") if self.datalogflag: with open(file, 'a') as f: f.write('{0}, {1}, {2}'.format(time_string, cpm, cpm_err)) f.write('\n') self.vprint(2, 'Writing CPM to data log at {}'.format(file)) def handle_cpm(self, this_start, this_end): """ Get CPM from sensor, display text, send to server. """ cpm, cpm_err = self.sensor.get_cpm(this_start, this_end) counts = int(round(cpm * self.interval / 60)) self.data_handler.main( self.datalog, cpm, cpm_err, this_start, this_end, counts) def takedown(self): """Delete self and child objects and clean up GPIO nicely.""" # sensor self.sensor.cleanup() del(self.sensor) # power LED try: self.power_LED.off() except AttributeError: # no LED pass try: GPIO.cleanup() except NameError: # not on a Raspberry Pi so no GPIO pass # send the rest of the queue object to DEFAULT_DATA_BACKLOG_FILE upon # shutdown self.data_handler.send_all_to_backlog() # self. can I even do this? del(self) @classmethod def from_argparse(cls): """ Initialize a Manager instance using arguments from the command line. For usage: python manager.py -h """ # Note: keep the keywords identical to the keywords in __init__(), # to avoid individual handling of arguments. # The arguments with default=None depend on test state. # They are handled in __init__() # Also, LED pin numbers could be added here if you want. parser = argparse.ArgumentParser( description="Manager for the DoseNet radiation detector") # test mode parser.add_argument( '--test', '-t', action='store_true', default=False, help='Start in test mode (no config, 30s intervals)') # interval: default depends on whether test mode is enabled parser.add_argument( '--interval', '-i', type=int, default=None, help=('Interval of CPM measurement, in seconds' + ' (default 300 for normal mode)')) # verbosity parser.add_argument( '--verbosity', '-v', type=int, default=None, help='Verbosity level (0 to 3) (default 1)') parser.add_argument( '--log', '-l', action='store_true', default=False, help='Enable file logging of all verbose text (default off)') parser.add_argument( '--logfile', '-f', type=str, default=None, help='Specify file for logging (default {})'.format( DEFAULT_LOGFILE)) # config file and public key parser.add_argument( '--config', '-c', default=None, help='Specify a config file (default {})'.format(DEFAULT_CONFIG)) parser.add_argument( '--publickey', '-k', default=None, help='Specify a publickey file (default {})'.format( DEFAULT_PUBLICKEY)) # server address and port parser.add_argument( '--hostname', '-s', default=DEFAULT_HOSTNAME, help='Specify a server hostname (default {})'.format( DEFAULT_HOSTNAME)) parser.add_argument( '--port', '-p', type=int, default=None, help='Specify a port for the server ' + '(default {} for UDP, {} for TCP)'.format( DEFAULT_UDP_PORT, DEFAULT_TCP_PORT)) parser.add_argument( '--sender-mode', '-m', type=str, default=DEFAULT_SENDER_MODE, choices=['udp', 'tcp', 'UDP', 'TCP'], help='The network protocol used in sending data ' + '(default {})'.format(DEFAULT_SENDER_MODE)) # datalog parser.add_argument( '--datalog', '-d', default=None, help='Specify a path for the datalog (default {})'.format( DEFAULT_DATALOG)) parser.add_argument( '--datalogflag', '-a', action='store_true', default=False, help='Enable logging local data (default off)') # communication protocal parser.add_argument( '--protocol', '-r', default=DEFAULT_PROTOCOL, help='Specify what communication protocol is to be used ' + '(default {})'.format(DEFAULT_PROTOCOL)) args = parser.parse_args() arg_dict = vars(args) mgr = Manager(**arg_dict) return mgr
def __init__(self, sep, port): self.sep = sep self.rpc_manager = RPC_Manager() self.data_handler = Data_Handler(sep, self.rpc_manager.on_data) self.thread = ClientThread(port, self.data_handler, sep) self.register_handlers()
def get_players_from_particular_state(): dh.players_from_state()
def __init__(self, network_LED_pin=NETWORK_LED_PIN, counts_LED_pin=COUNTS_LED_PIN, signal_pin=SIGNAL_PIN, noise_pin=NOISE_PIN, sender_mode=DEFAULT_SENDER_MODE, interval=None, config=None, publickey=None, hostname=DEFAULT_HOSTNAME, port=None, verbosity=None, log=False, logfile=None, datalog=None, datalogflag=False, protocol=DEFAULT_PROTOCOL, test=None, ): self.quit_after_interval = False self.protocol = protocol self.datalog = datalog self.datalogflag = datalogflag self.a_flag() self.d_flag() self.make_data_log(self.datalog) self.handle_input(log, logfile, verbosity, test, interval, config, publickey) self.test = test # LEDs if RPI: self.network_LED = LED(network_LED_pin) self.counts_LED = LED(counts_LED_pin) else: self.network_LED = None self.counts_LED = None # other objects self.sensor = Sensor( counts_LED=self.counts_LED, verbosity=self.v, logfile=self.logfile) self.data_handler = Data_Handler( manager=self, verbosity=self.v, logfile=self.logfile, network_led=self.network_LED) self.sender = ServerSender( manager=self, mode=sender_mode, port=port, verbosity=self.v, logfile=self.logfile) self.init_log() # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender self.branch = '' self.data_handler.backlog_to_queue()
X: numpy.array, shape (n, 2) A two dimensional array containing the coordinates of the embedding. y: numpy.array The labels of the datapoints. title: str A title for the plot. """ ax.axis('off') ax.patch.set_visible(False) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] varieties = y.unique() for i, var in enumerate(varieties): temp = X[y == var] x_temp = temp.iloc[:, [0]] y_temp = temp.iloc[:, [1]] ax.scatter(x_temp, y_temp, color=colors[i], label=var) if __name__ == '__main__': wrangler = Data_Handler('data/cleaned_data.csv') test = wrangler.get_top_num(2) X_test = test['description'] y_test = test['variety'] vecto = CountVectorizer(stop_words='english', max_features=10) X_vect = vecto.fit_transform(X_test) smol = TruncatedSVD(n_components=5) smol_test = smol.fit_transform(X_vect) fig, ax = plt.subplots() scree_plot(ax, smol) plt.show()