Python Data_Handler 예제들, data_handler.Data_Handler Python 예제들

예제 #1

0

파일 보기

def confusion_matrix():
    '''
    Creates a full confusion matrix for the top 15 varieties and displays it.
    Currently changes to vectorizer and model must be done manually.
    '''

    wrangler = Data_Handler('data/cleaned_data.csv')
    df = wrangler.get_top_num(15)
    stops = wrangler.stop_words

    X = df['description']
    y = df['variety']
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    vecto = TfidfVectorizer(stop_words=stops)
    X_train = vecto.fit_transform(X_train)
    X_test = vecto.transform(X_test)
    model = ComplementNB()
    model.fit(X_train, y_train)

    class_sort = [
        'Pinot Noir', 'Cabernet Sauvignon', 'Red Blend',
        'Bordeaux-style Red Blend', 'Syrah', 'Merlot', 'Zinfandel',
        'Sangiovese', 'Malbec', 'Nebbiolo', 'Rosé', 'Chardonnay',
        'Sauvignon Blanc', 'Riesling', 'White Blend'
    ]
    plot_confusion_matrix(model,
                          X_test,
                          y_test,
                          normalize='true',
                          xticks_rotation='vertical',
                          labels=class_sort,
                          include_values=False)
    plt.show()

예제 #2

0

파일 보기

파일: model_testing.py 프로젝트: IHetterich/wine-classifier

def pickling():
    '''
    Creates and pickles both the vectorizer and model for use in prediction.

    Parameters
    ----------
    None

    Returns
    ----------
    None
    '''

    wrangler = Data_Handler('data/cleaned_data.csv')
    stops = wrangler.stop_words
    df = wrangler.get_top_num(15)
    X = df['description']
    y = df['variety']

    vecto = TfidfVectorizer(stop_words=stops)
    X = vecto.fit_transform(df['description'])
    f = open('pickles/text_vec.pkl', 'wb')
    pickle.dump(vecto, f)

    model = ComplementNB()
    model.fit(X, y)
    m = open('pickles/model.pkl', 'wb')
    pickle.dump(model, m)

예제 #3

0

파일 보기

def elmo_data_prep():
    '''
    Takes in data to be vectorized and goes through a cleaning and
    lemmatization process so it plays licely with ELMo.
    '''

    start = time.time()
    print(time.asctime(time.localtime(start)))
    wrangler = Data_Handler('data/cleaned_data.csv')
    df = wrangler.get_top_num(15)
    stops = wrangler.stop_words

    X = df['description']
    y = df['variety']

    # Scrubbing methods
    punctuation = ',.!"#$%&()*+-/:;<=>?@[\\]^_`{|}~'
    df['description'] = df['description'].apply(
        lambda x: ''.join(ch for ch in str(x) if ch not in set(punctuation)))
    df['description'] = df['description'].str.lower()
    df['description'] = df['description'].str.replace("[0-9]", " ")
    df['description'] = df['description'].apply(
        lambda x: ' '.join([word for word in x.split() if word not in stops]))
    df['description'] = lemmatization(df['description'])

    # Saves data to new .csv
    df.to_csv('data/elmo_prepped_data.csv')
    print(df.head())
    print(time.time() - start)

예제 #4

0

파일 보기

파일: run_tfidf.py 프로젝트: Heringer-Epson/TFIDF_example

 def perform_embedding(self):
     """Use an instance of the Data_Handler class to tokenize the data.
     POS-tagging is enforced.
     """
     DH = Data_Handler()
     self.Train_X = DH.clean_tokenize(self.Train_X)
     self.Test_X = DH.clean_tokenize(self.Test_X)

예제 #5

0

파일 보기

    def perform_embedding(self):
        """Read the pre-trained GloVe file. Then, use the Data_Handler class
        to create a matrix of weights for the words/tokens present in our corpus.
        """

        DH = Data_Handler()

        #Parse pre-trained embedding.
        raw_embedding = DH.load_embedding('./../data/glove.6B.%dd.txt' %
                                          self._embedding_dim)

        #Create matrix with embedding for the words present in the corpus.
        self.embedding_vectors = DH.get_weight_matrix(
            raw_embedding, self.tokenizer.word_index)

예제 #6

0

파일 보기

def get_team_members():

    team = dh.teams_info()
    team.print_player_names()

    while True:
        print(
            f"\nThe players are ordered by {team.ordering}. Would you like to"
            f" reorder the players by one of their other statistics? (Y/N): ")
        entry = get_input1()
        if entry == 'y':
            while True:
                print("Players can be reordered by:   \
Last name, Jersey, Position, Salary, Experience, Age, Weight, Height")

                print("Type in the name of the stat\
you would like to reorder by: ")
                stat = get_input2()
                if team.reorder(stat):
                    # reordered and printed successfully
                    break
                else:
                    print("Input is invalid. Try again.")

        elif entry == 'n':
            break
        else:
            print("Invalid input. Please try again.")

예제 #7

0

파일 보기

def word_cloud():
    '''
    Creates a wordcloud for a given variety to be chosen on line 75.
    '''

    wrangler = Data_Handler('data/cleaned_data.csv')
    # df = wrangler.full
    df = wrangler.get_certain_varieties(['Chardonnay'])
    text = ' '.join(review for review in df['description'])
    stops = wrangler.stop_words
    wordcloud = WordCloud(stopwords=stops,
                          background_color='white',
                          width=500,
                          height=600,
                          colormap='viridis').generate(text)
    plt.figure(figsize=(6, 6), dpi=250)
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.show()

예제 #8

0

파일 보기

    def test_reorder_standings(self):

        team_standings = dh.search_team_standings(2020)
        team_standings.reorder('team name')
        self.assertEqual(team_standings.ordering, 'team name')
        city_team_last_alphabet = team_standings._eastern_conference[0].city
        self.assertEqual(city_team_last_alphabet, 'Philadelphia')

        team_standings.reorder('percentage')
        city_team_last_percentage = team_standings._western_conference[0].city
        self.assertEqual(city_team_last_percentage, 'Los Angeles')

예제 #9

0

파일 보기

def top_x_words(num):
    '''
    Prints out the top num words most highly weighted by tf-idf for a certain
    number of varieties. Selection of varieties can be changed on line 23.
    '''

    wrangler = Data_Handler('data/cleaned_data.csv')
    pn = wrangler.get_top_num(15)
    stops = wrangler.stop_words
    vectorizer = TfidfVectorizer(max_features=100, stop_words=stops)
    vectorizer.fit(pn['description'])
    X = vectorizer.transform(pn['description'])
    model = ComplementNB()
    model.fit(X, pn['variety'])

    feature_words = vectorizer.get_feature_names()
    target_names = model.classes_
    for var in range(len(target_names)):
        print(f"\nTarget: {var}, name: {target_names[var]}")
        log_prob = model.feature_log_prob_[var]
        i_topn = np.argsort(log_prob)[::-1][:num]
        features_topn = [feature_words[i] for i in i_topn]
        print(f"Top {num} tokens: ", features_topn)

예제 #10

0

파일 보기

    def _fit(self, data):
        '''
        Takes in the data for the recommender to be trained and fit to.

        Parameters
        ----------
        data - The filepath to the data being fit.

        Returns
        ----------
        None
        '''

        wrangler = Data_Handler(data)
        df = wrangler.get_top_num(15)
        X = df['description']
        y = df['variety']

        X = self.vecto.fit_transform(X)
        self.nb.fit(X, y)
        X = self.nb.predict_proba(X)

        self.rf.fit(X, y)

예제 #11

0

파일 보기

    def test_ordering_players_based_on_stats(self):

        team = dh.get_team('HOU')
        team.reorder("Salary")
        top_paid_player = team._players[0]
        top_paid_player_name = top_paid_player.full_name
        self.assertEqual(top_paid_player_name, 'Russell Westbrook')

        team.reorder("Height")
        tallest_player = team._players[0]
        tallest_player_name = tallest_player.full_name
        self.assertEqual(tallest_player_name, 'Tyson Chandler')

        team = dh.get_team('LAL')
        team.reorder("Weight")
        heaviest_player = team._players[0]
        heaviest_player_name = heaviest_player.full_name
        self.assertEqual(heaviest_player_name, 'JaVale McGee')

        team.reorder("Age")
        oldest_player = team._players[0]
        oldest_player_name = oldest_player.full_name
        self.assertEqual(oldest_player_name, 'LeBron James')

예제 #12

0

파일 보기

    def test_print_out_player(self):

        team = dh.get_team('HOU')
        team.reorder("Salary")
        top_paid_player = team._players[0]
        self.assertEqual(
            str(top_paid_player), f"Russell Westbrook of the Houston Rockets"
            f" is 31 years old."
            f"\nHis team plays in both the Western conference and the Southwest division."
            f"\nHe is 190.5cm tall and weighs 90.9kg."
            f"\nHis salary is $38,506,482 and he has been playing in"
            f" the NBA for 12 years.\n"
            "\nFurther player details:\n"
            '{:<40}'.format("\nBirthday: 12/11/1988") +
            '{:<40}'.format("Birth City: Long Beach") +
            '{:<40}'.format("\nBirth State: California") +
            '{:<40}'.format("College: UCLA") +
            '{:<40}'.format("\nPosition: PG") +
            '{:<40}'.format("Jersey Number: 0"))

예제 #13

0

파일 보기

def graph_top_num(num):
    '''
    Graphs the number of reviews for the top num varieties in the dataset.
    '''

    data_handler = Data_Handler('data/cleaned_data.csv')
    varietals = list(data_handler.freq_dict.keys())
    counts = list(data_handler.freq_dict.values())
    sort_idx = np.argsort(counts)
    top_idx = sort_idx[-1:-(num + 1):-1]

    top_varietals = [varietals[idx] for idx in top_idx]
    top_counts = [counts[idx] for idx in top_idx]

    fig, ax = plt.subplots(figsize=(15, 15), dpi=100)
    x = list(range(num))
    ax.bar(x, top_counts, tick_label=top_varietals)
    ax.set_xlabel('Varieties', fontsize=20)
    ax.set_ylabel('Reviews', fontsize=20)
    ax.set_title(f'Top {num} Reviewed Varieties', fontsize=25)
    plt.xticks(rotation=65, fontsize=15)
    plt.tight_layout()
    plt.show()

예제 #14

0

파일 보기

def order_standings(season_year):

    team_standings = dh.search_team_standings(season_year)
    team_standings.get_standings()
    while True:
        print(f"\nThe standings are ordered by {team_standings.ordering}. \
Would you like to reorder the standings by one of the \
other columns? (Y/N): ")
        entry = get_input3()
        if entry == 'y':
            while True:
                print("Type in the name of the column \
you would like to reorder by: ")
                column = get_input4()
                if team_standings.reorder(column):
                    team_standings.get_standings()
                    break
                else:
                    print("Input is invalid. Try again.")

        elif entry == 'n':
            break
        else:
            print("Invalid input. Please try again.")

예제 #15

0

파일 보기

파일: model_testing.py 프로젝트: IHetterich/wine-classifier

    pickle.dump(model, m)


if __name__ == '__main__':
    '''
    Due to the nature of the functions here I haven't instantiated argparse.
    Honestly opening this file up and directly changing the parameter dictionary
    seems more straightforward and easier than having to pass in 10 values
    on the command line, especially when juggling multiple hyper parameters.

    Keeping whatever was found best in the last grid search as the only values.
    '''

    # params = {'max_features': [None], 'n-grams': [(1,1)]}

    wrangler = Data_Handler('data/cleaned_data.csv')
    stops = wrangler.stop_words
    df = wrangler.get_top_num(15)
    y = df['variety'].to_numpy()
    # X = df['description'].to_numpy()

    i = open('data/elmo_vectors.pkl', 'rb')
    elmo = pickle.load(i)
    # df = df + 2.5
    # print(df.shape)
    # print(np.mean(df))
    # print(np.max(df))
    # print(np.min(df))

    model_testing(stops, elmo, y)
    # vectorizer_hyper_test(params, stops, X, y)

예제 #16

0

파일 보기

 def setUp(self):
     dh.import_team_data()
     dh.import_player_data()
     dh.import_past_two_seasons_data()

예제 #17

0

파일 보기

from data_handler import Data_Handler as dh

# need to import data from API
# another comment
dh.import_team_data()
dh.import_player_data()
dh.import_past_two_seasons_data()


def get_input1():
    return input().lower()


def get_input2():
    return input().lower()


def get_input3():
    return input().lower()


def get_input4():
    return input().lower()


def menu():
    print("\nMain menu: \n\
1 - Search for the statistics of an active player in the NBA\n\
2 - View the team standings for the 2018/2019 season \n\
3 - View the team standings for the 2019/2020 season \n\
4 - View the current players of a particular team\n\

예제 #18

0

파일 보기

파일: manager.py 프로젝트: ziruijiang/dosenet-raspberrypi

class Manager(object):
    """
    Master object for dosimeter operation.

    Initializes other classes, tracks time intervals, and converts the counts
    from Sensor into a CPM to give to the server.

    time_interval is the interval (in seconds) over for which CPM is
    calculated.
    """

    # Note: keep the __init__() keywords identical to the keywords in argparse,
    #   in order to avoid unpacking them individually.
    # The None's are handled differently, depending on whether test mode.
    def __init__(self,
                 network_LED_pin=NETWORK_LED_PIN,
                 counts_LED_pin=COUNTS_LED_PIN,
                 signal_pin=SIGNAL_PIN,
                 noise_pin=NOISE_PIN,
                 sender_mode=DEFAULT_SENDER_MODE,
                 interval=None,
                 config=None,
                 publickey=None,
                 hostname=DEFAULT_HOSTNAME,
                 port=None,
                 verbosity=None,
                 log=False,
                 logfile=None,
                 datalog=None,
                 datalogflag=False,
                 protocol=DEFAULT_PROTOCOL,
                 test=None,
                 ):

        self.quit_after_interval = False

        self.protocol = protocol

        self.datalog = datalog
        self.datalogflag = datalogflag

        self.a_flag()
        self.d_flag()
        self.make_data_log(self.datalog)

        self.handle_input(log, logfile, verbosity,
                          test, interval, config, publickey)

        self.test = test

        # LEDs
        if RPI:
            self.network_LED = LED(network_LED_pin)
            self.counts_LED = LED(counts_LED_pin)
        else:
            self.network_LED = None
            self.counts_LED = None

        # other objects
        self.sensor = Sensor(
            counts_LED=self.counts_LED,
            verbosity=self.v,
            logfile=self.logfile)
        self.data_handler = Data_Handler(
            manager=self,
            verbosity=self.v,
            logfile=self.logfile,
            network_led=self.network_LED)
        self.sender = ServerSender(
            manager=self,
            mode=sender_mode,
            port=port,
            verbosity=self.v,
            logfile=self.logfile)

        self.init_log()
        # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender
        self.branch = ''

        self.data_handler.backlog_to_queue()

    def init_log(self):
        """
        Post log message to server regarding Manager startup.
        """

        # set working directory
        cwd = os.getcwd()
        os.chdir(GIT_DIRECTORY)

        branch = subprocess.check_output(
            ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).rstrip()
        self.vprint(3, 'Found git branch: {}'.format(branch))
        commit = subprocess.check_output(
            ['git', 'rev-parse', '--short', 'HEAD']).rstrip()
        self.vprint(3, 'Found commit: {}'.format(commit))

        os.chdir(cwd)

        msg_code = BOOT_LOG_CODE
        msg_text = 'Booting on {} at {}'.format(branch, commit)
        self.vprint(1, 'Sending log message: [{}] {}'.format(
            msg_code, msg_text))
        try:
            self.sender.send_log(msg_code, msg_text)
        except (socket.gaierror, socket.error, socket.timeout):
            self.vprint(1, 'Failed to send log message, network error')
            if self.network_LED:
                self.network_LED.start_blink(
                    interval=NETWORK_LED_BLINK_PERIOD_S)
        else:
            self.vprint(2, 'Success sending log message')
            if self.network_LED:
                if self.network_LED.blinker:
                    self.network_LED.stop_blink()
                self.network_LED.on()

    def a_flag(self):
        """
        Checks if the -a from_argparse is called.

        If it is called, sets the path of the data-log to
        DEFAULT_DATALOG.
        """
        if self.datalogflag:
            self.datalog = DEFAULT_DATALOG

    def d_flag(self):
        """
        Checks if the -d from_argparse is called.

        If it is called, sets datalogflag to True.
        """
        if self.datalog:
            self.datalogflag = True

    def make_data_log(self, file):
        if self.datalogflag:
            with open(file, 'a') as f:
                pass

    def handle_input(self,
                     log, logfile, verbosity,
                     test, interval, config, publickey):

        # resolve logging defaults
        if log and logfile is None:
            # use default file if logging is enabled
            logfile = DEFAULT_LOGFILE
        if logfile and not log:
            # enable logging if logfile is specified
            #   (this overrides a log=False input which wouldn't make sense)
            log = True
        if log:
            self.logfile = logfile
        else:
            self.logfile = None

        # set up verbosity
        if verbosity is None:
            if test:
                verbosity = 2
            else:
                verbosity = 1
        self.v = verbosity
        set_verbosity(self, logfile=logfile)

        if log:
            self.vprint(1, '')
            self.vprint(1, 'Writing to logfile at {}'.format(self.logfile))
        self.test = test
        self.running = False

        # resolve defaults that depend on test mode
        if self.test:
            if interval is None:
                self.vprint(
                    2, "No interval given, using default for TEST MODE")
                interval = DEFAULT_INTERVAL_TEST

        else:
            if interval is None:
                self.vprint(
                    2, "No interval given, using default for NORMAL MODE")
                interval = DEFAULT_INTERVAL_NORMAL
            if config is None:
                self.vprint(2, "No config file given, " +
                            "attempting to use default config path")
                config = DEFAULT_CONFIG
            if publickey is None:
                self.vprint(2, "No publickey file given, " +
                            "attempting to use default publickey path")
                publickey = DEFAULT_PUBLICKEY

        self.interval = interval

        if self.datalogflag:
            self.vprint(
                1, 'Writing CPM to data log at {}'.format(self.datalog))

        if config:
            try:
                self.config = Config(config,
                                     verbosity=self.v, logfile=self.logfile)
            except IOError:
                raise IOError(
                    'Unable to open config file {}!'.format(config))
        else:
            self.vprint(
                1, 'WARNING: no config file given. Not posting to server')
            self.config = None

        if publickey:
            try:
                self.publickey = PublicKey(
                    publickey, verbosity=self.v, logfile=self.logfile)
            except IOError:
                raise IOError(
                    'Unable to load publickey file {}!'.format(publickey))
        else:
            self.vprint(
                1, 'WARNING: no public key given. Not posting to server')
            self.publickey = None

        self.aes = None     # gets checked in sender. feature in manager_d3s

    def run(self):
        """
        Start counting time.

        This method does NOT return, so run in a subprocess if you
        want to keep control.

        However, setting self.running = False will stop, as will a
          KeyboardInterrupt.
        """

        this_start, this_end = self.get_interval(time.time())
        self.vprint(
            1, ('Manager is starting to run at {}' +
                ' with intervals of {}s').format(
                datetime_from_epoch(this_start), self.interval))
        self.running = True

        try:
            while self.running:
                self.vprint(3, 'Sleeping at {} until {}'.format(
                    datetime_from_epoch(time.time()),
                    datetime_from_epoch(this_end)))
                try:
                    self.sleep_until(this_end)
                except SleepError:
                    self.vprint(1, 'SleepError: system clock skipped ahead!')
                    # the previous start/end times are meaningless.
                    # There are a couple ways this could be handled.
                    # 1. keep the same start time, but go until time.time()
                    #    - but if there was actually an execution delay,
                    #      the CPM will be too high.
                    # 2. set start time to be time.time() - interval,
                    #    and end time is time.time().
                    #    - but if the system clock was adjusted halfway through
                    #      the interval, the CPM will be too low.
                    # The second one is more acceptable.
                    self.vprint(
                        3, 'former this_start = {}, this_end = {}'.format(
                            datetime_from_epoch(this_start),
                            datetime_from_epoch(this_end)))
                    this_start, this_end = self.get_interval(
                        time.time() - self.interval)

                self.handle_cpm(this_start, this_end)
                if self.quit_after_interval:
                    self.vprint(1, 'Reboot: taking down Manager')
                    self.stop()
                    self.takedown()
                    os.system('sudo {0} {1}'.format(
                        REBOOT_SCRIPT, self.branch))
                this_start, this_end = self.get_interval(this_end)
        except KeyboardInterrupt:
            self.vprint(1, '\nKeyboardInterrupt: stopping Manager run')
            self.stop()
            self.takedown()
        except SystemExit:
            self.vprint(1, '\nSystemExit: taking down Manager')
            self.stop()
            self.takedown()

    def stop(self):
        """Stop counting time."""
        self.running = False

    def sleep_until(self, end_time, retry=True):
        """
        Sleep until the given timestamp.

        Input:

          end_time: number of seconds since epoch, e.g. time.time()
        """

        catching_up_flag = False
        sleeptime = end_time - time.time()
        self.vprint(3, 'Sleeping for {} seconds'.format(sleeptime))
        if sleeptime < 0:
            # can happen if flushing queue to server takes longer than interval
            sleeptime = 0
            catching_up_flag = True
        time.sleep(sleeptime)
        if self.quit_after_interval and retry:
            # SIGQUIT signal somehow interrupts time.sleep
            # which makes the retry argument needed
            self.sleep_until(end_time, retry=False)
        now = time.time()
        self.vprint(
            2, 'sleep_until offset is {} seconds'.format(now - end_time))
        # normally this offset is < 0.1 s
        # although a reboot normally produces an offset of 9.5 s
        #   on the first cycle
        if not catching_up_flag and (now - end_time > 10 or now < end_time):
            # raspberry pi clock reset during this interval
            # normally the first half of the condition triggers it.
            raise SleepError

    def get_interval(self, start_time):
        """
        Return start and end time for interval, based on given start_time.
        """
        end_time = start_time + self.interval
        return start_time, end_time

    def data_log(self, file, **kwargs):
        """
        Writes cpm to data-log.
        """
        time_string = time.strftime("%Y-%m-%d %H:%M:%S")
        cpm, cpm_err = kwargs.get('cpm'), kwargs.get('cpm_err')
        if self.datalogflag:
            with open(file, 'a') as f:
                f.write('{0}, {1}, {2}'.format(time_string, cpm, cpm_err))
                f.write('\n')
                self.vprint(2, 'Writing CPM to data log at {}'.format(file))

    def handle_cpm(self, this_start, this_end):
        """
        Get CPM from sensor, display text, send to server.
        """
        cpm, cpm_err = self.sensor.get_cpm(this_start, this_end)
        counts = int(round(cpm * self.interval / 60))
        self.data_handler.main(
            self.datalog, cpm, cpm_err, this_start, this_end, counts)

    def takedown(self):
        """Delete self and child objects and clean up GPIO nicely."""

        # sensor
        self.sensor.cleanup()
        del(self.sensor)

        try:
            GPIO.cleanup()
        except NameError:
            # not on a Raspberry Pi so no GPIO
            pass

        # send the rest of the queue object to DEFAULT_DATA_BACKLOG_FILE upon
        #   shutdown
        self.data_handler.send_all_to_backlog()

        # self. can I even do this?
        del(self)

    @classmethod
    def from_argparse(cls):
        """
        Initialize a Manager instance using arguments from the command line.

        For usage:
        python manager.py -h
        """

        # Note: keep the keywords identical to the keywords in __init__(),
        #   to avoid individual handling of arguments.
        # The arguments with default=None depend on test state.
        #   They are handled in __init__()
        # Also, LED pin numbers could be added here if you want.

        parser = argparse.ArgumentParser(
            description="Manager for the DoseNet radiation detector")
        # test mode
        parser.add_argument(
            '--test', '-t', action='store_true', default=False,
            help='Start in test mode (no config, 30s intervals)')
        # interval: default depends on whether test mode is enabled
        parser.add_argument(
            '--interval', '-i', type=int, default=None,
            help=('Interval of CPM measurement, in seconds' +
                  ' (default 300 for normal mode)'))
        # verbosity
        parser.add_argument(
            '--verbosity', '-v', type=int, default=None,
            help='Verbosity level (0 to 3) (default 1)')
        parser.add_argument(
            '--log', '-l', action='store_true', default=False,
            help='Enable file logging of all verbose text (default off)')
        parser.add_argument(
            '--logfile', '-f', type=str, default=None,
            help='Specify file for logging (default {})'.format(
                DEFAULT_LOGFILE))
        # config file and public key
        parser.add_argument(
            '--config', '-c', default=None,
            help='Specify a config file (default {})'.format(DEFAULT_CONFIG))
        parser.add_argument(
            '--publickey', '-k', default=None,
            help='Specify a publickey file (default {})'.format(
                DEFAULT_PUBLICKEY))
        # server address and port
        parser.add_argument(
            '--hostname', '-s', default=DEFAULT_HOSTNAME,
            help='Specify a server hostname (default {})'.format(
                DEFAULT_HOSTNAME))
        parser.add_argument(
            '--port', '-p', type=int, default=None,
            help='Specify a port for the server ' +
            '(default {} for UDP, {} for TCP)'.format(
                DEFAULT_UDP_PORT, DEFAULT_TCP_PORT))
        parser.add_argument(
            '--sender-mode', '-m', type=str, default=DEFAULT_SENDER_MODE,
            choices=['udp', 'tcp', 'UDP', 'TCP'],
            help='The network protocol used in sending data ' +
            '(default {})'.format(DEFAULT_SENDER_MODE))

        # datalog
        parser.add_argument(
            '--datalog', '-d', default=None,
            help='Specify a path for the datalog (default {})'.format(
                DEFAULT_DATALOG))
        parser.add_argument(
            '--datalogflag', '-a', action='store_true', default=False,
            help='Enable logging local data (default off)')

        # communication protocal
        parser.add_argument(
            '--protocol', '-r', default=DEFAULT_PROTOCOL,
            help='Specify what communication protocol is to be used ' +
            '(default {})'.format(DEFAULT_PROTOCOL))

        args = parser.parse_args()
        arg_dict = vars(args)
        mgr = Manager(**arg_dict)

        return mgr

예제 #19

0

파일 보기

파일: manager.py 프로젝트: tybtab/dosenet-raspberrypi

    def __init__(self,
                 network_LED_pin=NETWORK_LED_PIN,
                 power_LED_pin=POWER_LED_PIN,
                 counts_LED_pin=COUNTS_LED_PIN,
                 signal_pin=SIGNAL_PIN,
                 noise_pin=NOISE_PIN,
                 sender_mode=DEFAULT_SENDER_MODE,
                 interval=None,
                 config=None,
                 publickey=None,
                 hostname=DEFAULT_HOSTNAME,
                 port=None,
                 verbosity=None,
                 log=False,
                 logfile=None,
                 datalog=None,
                 datalogflag=False,
                 protocol=DEFAULT_PROTOCOL,
                 test=None,
                 ):

        self.quit_after_interval = False

        self.protocol = protocol

        self.datalog = datalog
        self.datalogflag = datalogflag

        self.a_flag()
        self.d_flag()
        self.make_data_log(self.datalog)

        self.handle_input(log, logfile, verbosity,
                          test, interval, config, publickey)

        self.test = test

        # LEDs
        if RPI:
            self.power_LED = LED(power_LED_pin)
            self.network_LED = LED(network_LED_pin)
            self.counts_LED = LED(counts_LED_pin)

            self.power_LED.on()
        else:
            self.power_LED = None
            self.network_LED = None
            self.counts_LED = None

        # other objects
        self.sensor = Sensor(
            counts_LED=self.counts_LED,
            verbosity=self.v,
            logfile=self.logfile)
        self.data_handler = Data_Handler(
            manager=self,
            verbosity=self.v,
            logfile=self.logfile,
            network_led=self.network_LED)
        self.sender = ServerSender(
            manager=self,
            mode=sender_mode,
            port=port,
            verbosity=self.v,
            logfile=self.logfile)

        self.init_log()
        # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender
        self.branch = ''

        self.data_handler.backlog_to_queue()

예제 #20

0

파일 보기

파일: manager.py 프로젝트: tybtab/dosenet-raspberrypi

class Manager(object):
    """
    Master object for dosimeter operation.

    Initializes other classes, tracks time intervals, and converts the counts
    from Sensor into a CPM to give to the server.

    time_interval is the interval (in seconds) over for which CPM is
    calculated.
    """

    # Note: keep the __init__() keywords identical to the keywords in argparse,
    #   in order to avoid unpacking them individually.
    # The None's are handled differently, depending on whether test mode.
    def __init__(self,
                 network_LED_pin=NETWORK_LED_PIN,
                 power_LED_pin=POWER_LED_PIN,
                 counts_LED_pin=COUNTS_LED_PIN,
                 signal_pin=SIGNAL_PIN,
                 noise_pin=NOISE_PIN,
                 sender_mode=DEFAULT_SENDER_MODE,
                 interval=None,
                 config=None,
                 publickey=None,
                 hostname=DEFAULT_HOSTNAME,
                 port=None,
                 verbosity=None,
                 log=False,
                 logfile=None,
                 datalog=None,
                 datalogflag=False,
                 protocol=DEFAULT_PROTOCOL,
                 test=None,
                 ):

        self.quit_after_interval = False

        self.protocol = protocol

        self.datalog = datalog
        self.datalogflag = datalogflag

        self.a_flag()
        self.d_flag()
        self.make_data_log(self.datalog)

        self.handle_input(log, logfile, verbosity,
                          test, interval, config, publickey)

        self.test = test

        # LEDs
        if RPI:
            self.power_LED = LED(power_LED_pin)
            self.network_LED = LED(network_LED_pin)
            self.counts_LED = LED(counts_LED_pin)

            self.power_LED.on()
        else:
            self.power_LED = None
            self.network_LED = None
            self.counts_LED = None

        # other objects
        self.sensor = Sensor(
            counts_LED=self.counts_LED,
            verbosity=self.v,
            logfile=self.logfile)
        self.data_handler = Data_Handler(
            manager=self,
            verbosity=self.v,
            logfile=self.logfile,
            network_led=self.network_LED)
        self.sender = ServerSender(
            manager=self,
            mode=sender_mode,
            port=port,
            verbosity=self.v,
            logfile=self.logfile)

        self.init_log()
        # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender
        self.branch = ''

        self.data_handler.backlog_to_queue()

    def init_log(self):
        """
        Post log message to server regarding Manager startup.
        """

        # set working directory
        cwd = os.getcwd()
        os.chdir(GIT_DIRECTORY)

        branch = subprocess.check_output(
            ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).rstrip()
        self.vprint(3, 'Found git branch: {}'.format(branch))
        commit = subprocess.check_output(
            ['git', 'rev-parse', '--short', 'HEAD']).rstrip()
        self.vprint(3, 'Found commit: {}'.format(commit))

        os.chdir(cwd)

        msg_code = BOOT_LOG_CODE
        msg_text = 'Booting on {} at {}'.format(branch, commit)
        self.vprint(1, 'Sending log message: [{}] {}'.format(
            msg_code, msg_text))
        try:
            self.sender.send_log(msg_code, msg_text)
        except (socket.gaierror, socket.error, socket.timeout):
            self.vprint(1, 'Failed to send log message, network error')
            if self.network_LED:
                self.network_LED.start_blink(
                    interval=NETWORK_LED_BLINK_PERIOD_S)
        else:
            self.vprint(2, 'Success sending log message')
            if self.network_LED:
                if self.network_LED.blinker:
                    self.network_LED.stop_blink()
                self.network_LED.on()

    def a_flag(self):
        """
        Checks if the -a from_argparse is called.

        If it is called, sets the path of the data-log to
        DEFAULT_DATALOG.
        """
        if self.datalogflag:
            self.datalog = DEFAULT_DATALOG

    def d_flag(self):
        """
        Checks if the -d from_argparse is called.

        If it is called, sets datalogflag to True.
        """
        if self.datalog:
            self.datalogflag = True

    def make_data_log(self, file):
        if self.datalogflag:
            with open(file, 'a') as f:
                pass

    def handle_input(self,
                     log, logfile, verbosity,
                     test, interval, config, publickey):

        # resolve logging defaults
        if log and logfile is None:
            # use default file if logging is enabled
            logfile = DEFAULT_LOGFILE
        if logfile and not log:
            # enable logging if logfile is specified
            #   (this overrides a log=False input which wouldn't make sense)
            log = True
        if log:
            self.logfile = logfile
        else:
            self.logfile = None

        # set up verbosity
        if verbosity is None:
            if test:
                verbosity = 2
            else:
                verbosity = 1
        self.v = verbosity
        set_verbosity(self, logfile=logfile)

        if log:
            self.vprint(1, '')
            self.vprint(1, 'Writing to logfile at {}'.format(self.logfile))
        self.test = test
        self.running = False

        # resolve defaults that depend on test mode
        if self.test:
            if interval is None:
                self.vprint(
                    2, "No interval given, using default for TEST MODE")
                interval = DEFAULT_INTERVAL_TEST

        else:
            if interval is None:
                self.vprint(
                    2, "No interval given, using default for NORMAL MODE")
                interval = DEFAULT_INTERVAL_NORMAL
            if config is None:
                self.vprint(2, "No config file given, " +
                            "attempting to use default config path")
                config = DEFAULT_CONFIG
            if publickey is None:
                self.vprint(2, "No publickey file given, " +
                            "attempting to use default publickey path")
                publickey = DEFAULT_PUBLICKEY

        self.interval = interval

        if self.datalogflag:
            self.vprint(
                1, 'Writing CPM to data log at {}'.format(self.datalog))

        if config:
            try:
                self.config = Config(config,
                                     verbosity=self.v, logfile=self.logfile)
            except IOError:
                raise IOError(
                    'Unable to open config file {}!'.format(config))
        else:
            self.vprint(
                1, 'WARNING: no config file given. Not posting to server')
            self.config = None

        if publickey:
            try:
                self.publickey = PublicKey(
                    publickey, verbosity=self.v, logfile=self.logfile)
            except IOError:
                raise IOError(
                    'Unable to load publickey file {}!'.format(publickey))
        else:
            self.vprint(
                1, 'WARNING: no public key given. Not posting to server')
            self.publickey = None

        self.aes = None     # gets checked in sender. feature in manager_d3s

    def run(self):
        """
        Start counting time.

        This method does NOT return, so run in a subprocess if you
        want to keep control.

        However, setting self.running = False will stop, as will a
          KeyboardInterrupt.
        """

        this_start, this_end = self.get_interval(time.time())
        self.vprint(
            1, ('Manager is starting to run at {}' +
                ' with intervals of {}s').format(
                datetime_from_epoch(this_start), self.interval))
        self.running = True

        try:
            while self.running:
                self.vprint(3, 'Sleeping at {} until {}'.format(
                    datetime_from_epoch(time.time()),
                    datetime_from_epoch(this_end)))
                try:
                    self.sleep_until(this_end)
                except SleepError:
                    self.vprint(1, 'SleepError: system clock skipped ahead!')
                    # the previous start/end times are meaningless.
                    # There are a couple ways this could be handled.
                    # 1. keep the same start time, but go until time.time()
                    #    - but if there was actually an execution delay,
                    #      the CPM will be too high.
                    # 2. set start time to be time.time() - interval,
                    #    and end time is time.time().
                    #    - but if the system clock was adjusted halfway through
                    #      the interval, the CPM will be too low.
                    # The second one is more acceptable.
                    self.vprint(
                        3, 'former this_start = {}, this_end = {}'.format(
                            datetime_from_epoch(this_start),
                            datetime_from_epoch(this_end)))
                    this_start, this_end = self.get_interval(
                        time.time() - self.interval)

                self.handle_cpm(this_start, this_end)
                if self.quit_after_interval:
                    self.vprint(1, 'Reboot: taking down Manager')
                    self.stop()
                    self.takedown()
                    os.system('sudo {0} {1}'.format(
                        REBOOT_SCRIPT, self.branch))
                this_start, this_end = self.get_interval(this_end)
        except KeyboardInterrupt:
            self.vprint(1, '\nKeyboardInterrupt: stopping Manager run')
            self.stop()
            self.takedown()
        except SystemExit:
            self.vprint(1, '\nSystemExit: taking down Manager')
            self.stop()
            self.takedown()

    def stop(self):
        """Stop counting time."""
        self.running = False

    def sleep_until(self, end_time, retry=True):
        """
        Sleep until the given timestamp.

        Input:
          end_time: number of seconds since epoch, e.g. time.time()
        """

        catching_up_flag = False
        sleeptime = end_time - time.time()
        self.vprint(3, 'Sleeping for {} seconds'.format(sleeptime))
        if sleeptime < 0:
            # can happen if flushing queue to server takes longer than interval
            sleeptime = 0
            catching_up_flag = True
        time.sleep(sleeptime)
        if self.quit_after_interval and retry:
            # SIGQUIT signal somehow interrupts time.sleep
            # which makes the retry argument needed
            self.sleep_until(end_time, retry=False)
        now = time.time()
        self.vprint(
            2, 'sleep_until offset is {} seconds'.format(now - end_time))
        # normally this offset is < 0.1 s
        # although a reboot normally produces an offset of 9.5 s
        #   on the first cycle
        if not catching_up_flag and (now - end_time > 10 or now < end_time):
            # raspberry pi clock reset during this interval
            # normally the first half of the condition triggers it.
            raise SleepError

    def get_interval(self, start_time):
        """
        Return start and end time for interval, based on given start_time.
        """
        end_time = start_time + self.interval
        return start_time, end_time

    def data_log(self, file, cpm, cpm_err):
        """
        Writes cpm to data-log.
        """
        time_string = time.strftime("%Y-%m-%d %H:%M:%S")
        if self.datalogflag:
            with open(file, 'a') as f:
                f.write('{0}, {1}, {2}'.format(time_string, cpm, cpm_err))
                f.write('\n')
                self.vprint(2, 'Writing CPM to data log at {}'.format(file))

    def handle_cpm(self, this_start, this_end):
        """
        Get CPM from sensor, display text, send to server.
        """
        cpm, cpm_err = self.sensor.get_cpm(this_start, this_end)
        counts = int(round(cpm * self.interval / 60))
        self.data_handler.main(
            self.datalog, cpm, cpm_err, this_start, this_end, counts)

    def takedown(self):
        """Delete self and child objects and clean up GPIO nicely."""

        # sensor
        self.sensor.cleanup()
        del(self.sensor)

        # power LED
        try:
            self.power_LED.off()
        except AttributeError:
            # no LED
            pass
        try:
            GPIO.cleanup()
        except NameError:
            # not on a Raspberry Pi so no GPIO
            pass

        # send the rest of the queue object to DEFAULT_DATA_BACKLOG_FILE upon
        #   shutdown
        self.data_handler.send_all_to_backlog()

        # self. can I even do this?
        del(self)

    @classmethod
    def from_argparse(cls):
        """
        Initialize a Manager instance using arguments from the command line.

        For usage:
        python manager.py -h
        """

        # Note: keep the keywords identical to the keywords in __init__(),
        #   to avoid individual handling of arguments.
        # The arguments with default=None depend on test state.
        #   They are handled in __init__()
        # Also, LED pin numbers could be added here if you want.

        parser = argparse.ArgumentParser(
            description="Manager for the DoseNet radiation detector")
        # test mode
        parser.add_argument(
            '--test', '-t', action='store_true', default=False,
            help='Start in test mode (no config, 30s intervals)')
        # interval: default depends on whether test mode is enabled
        parser.add_argument(
            '--interval', '-i', type=int, default=None,
            help=('Interval of CPM measurement, in seconds' +
                  ' (default 300 for normal mode)'))
        # verbosity
        parser.add_argument(
            '--verbosity', '-v', type=int, default=None,
            help='Verbosity level (0 to 3) (default 1)')
        parser.add_argument(
            '--log', '-l', action='store_true', default=False,
            help='Enable file logging of all verbose text (default off)')
        parser.add_argument(
            '--logfile', '-f', type=str, default=None,
            help='Specify file for logging (default {})'.format(
                DEFAULT_LOGFILE))
        # config file and public key
        parser.add_argument(
            '--config', '-c', default=None,
            help='Specify a config file (default {})'.format(DEFAULT_CONFIG))
        parser.add_argument(
            '--publickey', '-k', default=None,
            help='Specify a publickey file (default {})'.format(
                DEFAULT_PUBLICKEY))
        # server address and port
        parser.add_argument(
            '--hostname', '-s', default=DEFAULT_HOSTNAME,
            help='Specify a server hostname (default {})'.format(
                DEFAULT_HOSTNAME))
        parser.add_argument(
            '--port', '-p', type=int, default=None,
            help='Specify a port for the server ' +
            '(default {} for UDP, {} for TCP)'.format(
                DEFAULT_UDP_PORT, DEFAULT_TCP_PORT))
        parser.add_argument(
            '--sender-mode', '-m', type=str, default=DEFAULT_SENDER_MODE,
            choices=['udp', 'tcp', 'UDP', 'TCP'],
            help='The network protocol used in sending data ' +
            '(default {})'.format(DEFAULT_SENDER_MODE))

        # datalog
        parser.add_argument(
            '--datalog', '-d', default=None,
            help='Specify a path for the datalog (default {})'.format(
                DEFAULT_DATALOG))
        parser.add_argument(
            '--datalogflag', '-a', action='store_true', default=False,
            help='Enable logging local data (default off)')

        # communication protocal
        parser.add_argument(
            '--protocol', '-r', default=DEFAULT_PROTOCOL,
            help='Specify what communication protocol is to be used ' +
            '(default {})'.format(DEFAULT_PROTOCOL))

        args = parser.parse_args()
        arg_dict = vars(args)
        mgr = Manager(**arg_dict)

        return mgr

예제 #21

0

파일 보기

 def __init__(self, sep, port):
     self.sep = sep
     self.rpc_manager = RPC_Manager()
     self.data_handler = Data_Handler(sep, self.rpc_manager.on_data)
     self.thread = ClientThread(port, self.data_handler, sep)
     self.register_handlers()

예제 #22

0

파일 보기

def get_players_from_particular_state():
    dh.players_from_state()

예제 #23

0

파일 보기

파일: manager.py 프로젝트: ziruijiang/dosenet-raspberrypi

    def __init__(self,
                 network_LED_pin=NETWORK_LED_PIN,
                 counts_LED_pin=COUNTS_LED_PIN,
                 signal_pin=SIGNAL_PIN,
                 noise_pin=NOISE_PIN,
                 sender_mode=DEFAULT_SENDER_MODE,
                 interval=None,
                 config=None,
                 publickey=None,
                 hostname=DEFAULT_HOSTNAME,
                 port=None,
                 verbosity=None,
                 log=False,
                 logfile=None,
                 datalog=None,
                 datalogflag=False,
                 protocol=DEFAULT_PROTOCOL,
                 test=None,
                 ):

        self.quit_after_interval = False

        self.protocol = protocol

        self.datalog = datalog
        self.datalogflag = datalogflag

        self.a_flag()
        self.d_flag()
        self.make_data_log(self.datalog)

        self.handle_input(log, logfile, verbosity,
                          test, interval, config, publickey)

        self.test = test

        # LEDs
        if RPI:
            self.network_LED = LED(network_LED_pin)
            self.counts_LED = LED(counts_LED_pin)
        else:
            self.network_LED = None
            self.counts_LED = None

        # other objects
        self.sensor = Sensor(
            counts_LED=self.counts_LED,
            verbosity=self.v,
            logfile=self.logfile)
        self.data_handler = Data_Handler(
            manager=self,
            verbosity=self.v,
            logfile=self.logfile,
            network_led=self.network_LED)
        self.sender = ServerSender(
            manager=self,
            mode=sender_mode,
            port=port,
            verbosity=self.v,
            logfile=self.logfile)

        self.init_log()
        # DEFAULT_UDP_PORT and DEFAULT_TCP_PORT are assigned in sender
        self.branch = ''

        self.data_handler.backlog_to_queue()

예제 #24

0

파일 보기

    X: numpy.array, shape (n, 2)
      A two dimensional array containing the coordinates of the embedding.
      
    y: numpy.array
      The labels of the datapoints.
      
    title: str
      A title for the plot.
    """
    ax.axis('off')
    ax.patch.set_visible(False)
    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
    varieties = y.unique()
    for i, var in enumerate(varieties):
      temp = X[y == var]
      x_temp = temp.iloc[:, [0]]
      y_temp = temp.iloc[:, [1]]
      ax.scatter(x_temp, y_temp, color=colors[i], label=var)

if __name__ == '__main__':
  wrangler = Data_Handler('data/cleaned_data.csv')
  test = wrangler.get_top_num(2)
  X_test = test['description']
  y_test = test['variety']
  vecto = CountVectorizer(stop_words='english', max_features=10)
  X_vect = vecto.fit_transform(X_test)
  smol = TruncatedSVD(n_components=5)
  smol_test = smol.fit_transform(X_vect)
  fig, ax = plt.subplots()
  scree_plot(ax, smol)
  plt.show()