Python DataCollectionの例、data_collection.DataCollection Pythonの例

コード例 #1

0

ファイルを表示

ファイル: data_cleaning.py プロジェクト: kush99993s/defaultfinder

	def __init__(self, number_file_read=2):
		"""
		Preprocess data for EDA and model building
		"""
		data  = DataCollection()
		dict_ = data.read_file(number_file_read)
		
		# Convert the data from a dict to pandas df
		self.df = self.convert_to_data_frame(dict_)
		
		# Encoding string info to numeric for model building
		self.country_code_dict = {}
		self.town_dict = {}
		self.sector_dict = {}
		self.theme_dict = {}
		self.geo_level_dict = {}
		self.activity_dict = {}
		self.repayment_interval_dict = {}
		self.status_dic = {}

		# Decoding numeric values to string values (i.e. country, activity, etc)
		self.country_code_list = list(self.df.country_code.unique())
		self.town_list= list(self.df.town.unique())
		self.sector_list= list(self.df.sector.unique())
		self.theme_list= list(self.df.theme.unique())
		self.geo_level_list= list(self.df.geo_level.unique())
		self.activity_list = list(self.df.activity.unique())
		self.repayment_interval_list = list(self.df.repayment_interval.unique())
		self.status_list=(self.df.status.unique())

		# This will fill the dictinary to encode string values		
		self.fill_dictionarys()
		self.change_all_variable()

コード例 #2

0

ファイルを表示

def start_proc(dir_seed):
    print(dir_seed[1])
    rows = ROWS
    cols = COLS
    num_plant_types = PlantType().num_plant_types
    depth = num_plant_types + 3  # +1 for 'earth' type, +1 for water, +1 for health
    sector_rows = SECTOR_ROWS
    sector_cols = SECTOR_COLS
    prune_window_rows = PRUNE_WINDOW_ROWS
    prune_window_cols = PRUNE_WINDOW_COLS
    garden_step = STEP

    action_low = 0
    action_high = 1
    obs_low = 0
    obs_high = rows * cols

    garden_days = 100
    sector_obs_per_day = int(NUM_PLANTS +
                             PERCENT_NON_PLANT_CENTERS * NUM_PLANTS)
    collection_time_steps = sector_obs_per_day * garden_days  # 210 sectors observed/garden_day * 200 garden_days
    water_threshold = 1.0

    data_collection = DataCollection()
    data_collection.evaluate_policy(
        data_collection.init_env(rows, cols, depth, sector_rows, sector_cols,
                                 prune_window_rows, prune_window_cols,
                                 action_low, action_high, obs_low, obs_high,
                                 collection_time_steps, garden_step,
                                 num_plant_types, dir_seed[0], dir_seed[1]),
        analytic_policy.policy, collection_time_steps, sector_rows,
        sector_cols, prune_window_rows, prune_window_cols, garden_step,
        water_threshold, sector_obs_per_day)

コード例 #3

0

ファイルを表示

ファイル: KEEGLogger.py プロジェクト: kowalej/KEEGLogger

    def begin_collection(self):
        user = self.get_active_user()
        mode = self.get_active_mode()
        print('''You are ready to start a data collection session {0}. 
\nIn this session you will be presented with {1} automatically generated "password(s)".
\nYour task is to simpy type each password as it is presented. If you make a mistake do not worry, just keep typing until you hit the correct key. Take  your time and remember to concentrate!'''
              .format(user, Constants.SESSION_ITERATIONS))
        muse = self.start_stream()
        input('\nPress any key to begin...')
        datacollection = DataCollection(user, mode,
                                        Constants.SESSION_ITERATIONS,
                                        self.museID)
        datacollection.start()
        self.stop_stream(muse)

コード例 #4

0

ファイルを表示

ファイル: pipeline.py プロジェクト: CG3002-4/rpi

def feature_extraction_pipeline(exp_names):
    """Implements a segmentation-preprocessing-feature_extraction pipeline."""
    segments = []

    for exp_name in exp_names:
        data_collection = DataCollection(exp_name)
        data_collection.load()
        segments.extend(data_collection.segment())

    print("Loaded segments")
    preprocessed_segments = preprocess.preprocess_segments(segments)
    print("Preprocessed segments")
    features = feature_extraction.extract_features(preprocessed_segments)
    print("Extracted features")
    labels = np.array([segment.label for segment in segments])

    return features, labels

コード例 #5

0

ファイルを表示

ファイル: monpy.py プロジェクト: alex----/monpy

def main(options):
    # print('__main__')
    options = Options.from_arguments(arguments)
    if options.collecting_data:
        data_collection = DataCollection(options)
        data_collection.collect_data()

    elif options.display_data:
        display(options)

    elif options.test:
        print('running tests...')
        suite = unittest.TestSuite()
        for t in [test.split('.')[0] for test in glob.glob('test_*.py')]:
            try:
                # If the module defines a suite() function, call it to get the suite.
                mod = __import__(t, globals(), locals(), ['suite'])
                suitefn = getattr(mod, 'suite')
                suite.addTest(suitefn())
            except (ImportError, AttributeError):
                # else, just load all the test cases from the module.
                suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t))
        unittest.TextTestRunner().run(suite)

コード例 #6

0

ファイルを表示

from data_collection import DataCollection
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as mpl

if __name__ == '__main__':

    usingDATA = DataCollection("E2", "G421", False)

    # 자료 분석(빈도수)
    COUNT = Counter(usingDATA)
    word_count = list()
    words = list()

    for p, q in COUNT.most_common(21):
        dic = {'word': p, 'count': q}
        word_count.append(dic)
        words.append(dic['word'])
    length = 1160

    NumOfCount = 1
    print("=" * 40)
    for k in word_count:
        try:
            print(
                f"{NumOfCount}. {k['word']:>6} | {k['count']:<3} | {k['count'] * 100 / length:0.2f}%"
            )
            NumOfCount = NumOfCount + 1
        except TypeError:
            continue
    print("=" * 40)

コード例 #7

0

ファイルを表示

ファイル: rpi_collect.py プロジェクト: CG3002-4/rpi

"""Code to be run on RPi while collecting data.

Usage:
    python3 rpi_collect.py host_ip port
"""
import sys
from data_collection import DataCollection
import sensor_data
from clientconnect import recv_data
# from dummy_data import recv_data

if __name__ == '__main__':
    data_collection = DataCollection(experiment_dir=sys.argv[1])

    try:
        for unpacked_data in recv_data():
            data_collection.process(unpacked_data[:12])
    except KeyboardInterrupt:
        # Use second argument as label for data
        data_collection.label = [int(sys.argv[2])]
        data_collection.save()

コード例 #8

0

ファイルを表示

ファイル: calculate.py プロジェクト: Zpeugh/stock_market

from data_collection import DataCollection
import sys
import matplotlib.pyplot as plt
import intrinio
from data_analysis import DataAnalysis

# USERNAME = '******'
# PASSWORD = '******'

USERNAME = '******'
PASSWORD = '******'

data_collection = DataCollection(username=USERNAME, password=PASSWORD)

ticker_symbol = "AAPL"
dates, prices = data_collection.retrieve_data(ticker_symbol)
da = DataAnalysis()


# data = da.get_yearly_trends(dates, prices, [3])

bounds, sell_dates, profits = da.get_optimal_bounds(dates, prices, low_bound_min=0.3, top_bound_max=0.5, interval=0.01, investment=100, purchase_strategy="immediate")
da.plot_data(dates, prices, ticker_symbol, sell_dates=sell_dates)


# if sys.argv[1]:
#     ticker = sys.argv[1]
#     start_date = sys.argv[2]
#     results = data_collection.get_prices(ticker, start_date)
#     data_collection.plot_data(ticker)
#     data_collection.save_data(ticker)

コード例 #9

0

ファイルを表示

from data_collection import DataCollection

DataCollection()

コード例 #10

0

ファイルを表示

    def __execute_data_collection(self, filename_output, dataHandle,
                                  document_input_list, debug_message,
                                  document_type):
        collection_sucess = False
        error_document = None
        has_error = False

        try:
            collection_attempts = 0

            while collection_sucess is False and collection_attempts < self.max_attempts:
                prefix_str = "(Re)" if collection_attempts > 0 else " "
                a_message = "{}{}".format(prefix_str, debug_message)
                print("\n")
                print(a_message, '\tData e hora: ', datetime.now(), flush=True)

                proxy_info = self.__get_proxy()
                instaloaderInstance = localinstaloader.Instaloader(
                    proxies=proxy_info)

                if document_type == "posts_hashtag":
                    instaloaderInstance.login(user=self.instagram_user,
                                              passwd=self.instagram_passwd)

                dataCollection = DataCollection(
                    filename_output=filename_output,
                    dataHandle=dataHandle,
                    instaloaderInstance=instaloaderInstance,
                    instaloaderClass=localinstaloader,
                    document_type=document_type)

                if proxy_info is None:
                    print(
                        "\t!!!ATENCAO!!!: Esta coleta nao esta utilizando proxy."
                    )
                else:
                    proxy_alias = proxy_info["https"].split("@")[1]
                    print("\tUtilizando o proxy:", proxy_alias)

                documents_collected = 0
                for document_input in document_input_list:
                    documents_collected += 1
                    if document_type == "profiles_posts":
                        print("\tColetando perfil do usuario {}".format(
                            document_input),
                              '\tData e hora: ',
                              datetime.now(),
                              flush=True)
                        has_error, error_document = dataCollection.collectProfile(
                            username=document_input)
                    elif document_type == "posts_profile":
                        print("\tColetando posts do usuario {} {}/{}".format(
                            document_input["nome_do_usuario"],
                            documents_collected, len(document_input_list)),
                              '\tData e hora: ',
                              datetime.now(),
                              "\n",
                              flush=True)
                        has_error, error_document = dataCollection.collectPosts(
                            data_min=self.min_date,
                            data_max=self.max_date,
                            post_limit=self.max_posts,
                            username=document_input['nome_do_usuario'],
                            hashtag=None)
                    elif document_type == "posts_hashtag":
                        print("\tColetando posts da hashtag {}".format(
                            document_input),
                              '\tData e hora: ',
                              datetime.now(),
                              "\n",
                              flush=True)
                        has_error, error_document = dataCollection.collectPosts(
                            data_min=self.min_date,
                            data_max=self.max_date,
                            post_limit=self.max_posts,
                            username=None,
                            hashtag=document_input)
                    elif document_type == "media":
                        print("\tColetando media do post {} {}/{}".format(
                            document_input['identificador'],
                            documents_collected, len(document_input_list)),
                              '\tData e hora: ',
                              datetime.now(),
                              flush=True)
                        has_error, error_document = dataCollection.downloadPostMedia(
                            post_id=document_input['identificador'],
                            media_url=document_input['identificador_midia'])
                    elif document_type == "comments_profile" or document_type == "comments_hashtag":
                        print("\tColetando comments do post {} {}/{}".format(
                            document_input['identificador'],
                            documents_collected, len(document_input_list)),
                              '\tData e hora: ',
                              datetime.now(),
                              flush=True)
                        has_error, error_document = dataCollection.collectComments(
                            post_id=document_input['identificador'],
                            comments_by_post_limit=self.max_comments,
                            line_debug_number=1000)
                    elif document_type == "profiles_comments":
                        print("\tColetando perfil do usuario {} {}/{}".format(
                            document_input['nome_do_usuario'],
                            documents_collected, len(document_input_list)),
                              '\tData e hora: ',
                              datetime.now(),
                              flush=True)
                        has_error, error_document = dataCollection.collectProfile(
                            username=document_input['nome_do_usuario'])
                    else:
                        print(
                            "Tipo de coleta nao identificado. Finalizando script..."
                        )
                        sys.exit(1)

                    if has_error is True:
                        if "429" in error_document:
                            print(
                                "Muitas requisicoes feitas recentemente. Erro:",
                                error_document)
                        collection_attempts += 1
                        collection_sucess = False
                        break
                    else:
                        collection_sucess = True

        except Exception as e:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(
                "\nProcesso de coleta sera finalizado devido a erro. O erro: ",
                e,
                '\tDetalhes: ',
                exc_type,
                fname,
                exc_tb.tb_lineno,
                '\tData e hora: ',
                datetime.now(),
                flush=True)

            exc_type, exc_obj, exc_tb = sys.exc_info()
            error_document = self.__getErrorDocument(exception_obj=e,
                                                     exc_type=exc_type,
                                                     exc_tb=exc_tb)

            self.__create_error_file(
                filename_output=self.filename_unified_data_file,
                error_document=error_document)
            print("Finalizando script.")
            sys.exit(1)
        finally:
            if has_error is True:
                print("{}{}".format(
                    "\nProcesso de coleta sera finalizado devido a erro. O erro: ",
                    error_document),
                      flush=True)
                self.__create_error_file(
                    filename_output=self.filename_unified_data_file,
                    error_document=error_document)
                sys.exit(1)

コード例 #11

0

ファイルを表示

ファイル: commander.py プロジェクト: jwbowler/maslab-staff-2013

def main():

    simulateCamera = False
    simulateSensors = False
    simulateActuators = False
    
    ard = Arduino()
    
    global vi
    if simulateCamera:
        vi = VisionInterfaceDummy()
    else:
        vi = VisionInterface()
        
    if simulateSensors:
        si = SensorInterfaceDummy(ard)
    else:
        si = SensorInterface(ard)
    
    global ctl
    if simulateActuators:
        ctl = ControlDummy(ard)
    else:
        ctl = Control(ard)
    
    dc = DataCollection(vi, si)
    
    #actions
    action_fw = FollowWallAction(ctl)
    action_hb = HuntBallAction(ctl)
    action_cb = CaptureBallAction(ctl)
    action_eb = EmergencyReverseAction(ctl)
    action_rp = RotateInPlaceAction(ctl)
    
    actionLookup = {                                       \
                    "ACTION_FOLLOW_WALL": action_fw,       \
                    "ACTION_HUNT_BALL": action_hb,         \
                    "ACTION_CAPTURE_BALL": action_cb,      \
                    "ACTION_EMERGENCY_REVERSE": action_eb, \
                    "ACTION_ROTATE_IN_PLACE": action_rp    \
                   }
    
    #goals
    goal_ex = ExploreGoal()
    goal_gb = GetBallGoal()
    
    goalLookup = {
                  "GOAL_EXPLORE": goal_ex, \
                  "GOAL_GET_BALL": goal_gb \
                 }
    
    tLast = time.time()
    tAvg = 0
    
    if not (simulateSensors and simulateActuators):
        ard.run()
        
    currentGoal = goal_ex
    
    signal.signal(signal.SIGALRM, alarm_handler)
    signal.alarm(180)
    
    ctl.ballCaptureOn()
    
    while (True):
        
        data = dc.get()
        if log:
            print "Data: "
            print data
        
            print "Goal: " + currentGoal.getName()

        (nextGoalName, actionName, actionArgs) = currentGoal.step(data)
        currentGoal = goalLookup[nextGoalName]
        
        if log:
            print "Action: " + actionName

        action = actionLookup[actionName]
        if actionArgs == None:
            action.step()
        else:
            action.step(actionArgs)
        
        tCurr = time.time()
        tDiff = tCurr - tLast
        tLast = tCurr
        tAvg = 0.9*tAvg + 0.1*tDiff
        print str(1/tAvg) + " FPS"
        print ""