def __init__(self, number_file_read=2): """ Preprocess data for EDA and model building """ data = DataCollection() dict_ = data.read_file(number_file_read) # Convert the data from a dict to pandas df self.df = self.convert_to_data_frame(dict_) # Encoding string info to numeric for model building self.country_code_dict = {} self.town_dict = {} self.sector_dict = {} self.theme_dict = {} self.geo_level_dict = {} self.activity_dict = {} self.repayment_interval_dict = {} self.status_dic = {} # Decoding numeric values to string values (i.e. country, activity, etc) self.country_code_list = list(self.df.country_code.unique()) self.town_list= list(self.df.town.unique()) self.sector_list= list(self.df.sector.unique()) self.theme_list= list(self.df.theme.unique()) self.geo_level_list= list(self.df.geo_level.unique()) self.activity_list = list(self.df.activity.unique()) self.repayment_interval_list = list(self.df.repayment_interval.unique()) self.status_list=(self.df.status.unique()) # This will fill the dictinary to encode string values self.fill_dictionarys() self.change_all_variable()
def start_proc(dir_seed): print(dir_seed[1]) rows = ROWS cols = COLS num_plant_types = PlantType().num_plant_types depth = num_plant_types + 3 # +1 for 'earth' type, +1 for water, +1 for health sector_rows = SECTOR_ROWS sector_cols = SECTOR_COLS prune_window_rows = PRUNE_WINDOW_ROWS prune_window_cols = PRUNE_WINDOW_COLS garden_step = STEP action_low = 0 action_high = 1 obs_low = 0 obs_high = rows * cols garden_days = 100 sector_obs_per_day = int(NUM_PLANTS + PERCENT_NON_PLANT_CENTERS * NUM_PLANTS) collection_time_steps = sector_obs_per_day * garden_days # 210 sectors observed/garden_day * 200 garden_days water_threshold = 1.0 data_collection = DataCollection() data_collection.evaluate_policy( data_collection.init_env(rows, cols, depth, sector_rows, sector_cols, prune_window_rows, prune_window_cols, action_low, action_high, obs_low, obs_high, collection_time_steps, garden_step, num_plant_types, dir_seed[0], dir_seed[1]), analytic_policy.policy, collection_time_steps, sector_rows, sector_cols, prune_window_rows, prune_window_cols, garden_step, water_threshold, sector_obs_per_day)
def begin_collection(self): user = self.get_active_user() mode = self.get_active_mode() print('''You are ready to start a data collection session {0}. \nIn this session you will be presented with {1} automatically generated "password(s)". \nYour task is to simpy type each password as it is presented. If you make a mistake do not worry, just keep typing until you hit the correct key. Take your time and remember to concentrate!''' .format(user, Constants.SESSION_ITERATIONS)) muse = self.start_stream() input('\nPress any key to begin...') datacollection = DataCollection(user, mode, Constants.SESSION_ITERATIONS, self.museID) datacollection.start() self.stop_stream(muse)
def feature_extraction_pipeline(exp_names): """Implements a segmentation-preprocessing-feature_extraction pipeline.""" segments = [] for exp_name in exp_names: data_collection = DataCollection(exp_name) data_collection.load() segments.extend(data_collection.segment()) print("Loaded segments") preprocessed_segments = preprocess.preprocess_segments(segments) print("Preprocessed segments") features = feature_extraction.extract_features(preprocessed_segments) print("Extracted features") labels = np.array([segment.label for segment in segments]) return features, labels
def main(options): # print('__main__') options = Options.from_arguments(arguments) if options.collecting_data: data_collection = DataCollection(options) data_collection.collect_data() elif options.display_data: display(options) elif options.test: print('running tests...') suite = unittest.TestSuite() for t in [test.split('.')[0] for test in glob.glob('test_*.py')]: try: # If the module defines a suite() function, call it to get the suite. mod = __import__(t, globals(), locals(), ['suite']) suitefn = getattr(mod, 'suite') suite.addTest(suitefn()) except (ImportError, AttributeError): # else, just load all the test cases from the module. suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t)) unittest.TextTestRunner().run(suite)
from data_collection import DataCollection from collections import Counter from wordcloud import WordCloud import matplotlib.pyplot as mpl if __name__ == '__main__': usingDATA = DataCollection("E2", "G421", False) # 자료 분석(빈도수) COUNT = Counter(usingDATA) word_count = list() words = list() for p, q in COUNT.most_common(21): dic = {'word': p, 'count': q} word_count.append(dic) words.append(dic['word']) length = 1160 NumOfCount = 1 print("=" * 40) for k in word_count: try: print( f"{NumOfCount}. {k['word']:>6} | {k['count']:<3} | {k['count'] * 100 / length:0.2f}%" ) NumOfCount = NumOfCount + 1 except TypeError: continue print("=" * 40)
"""Code to be run on RPi while collecting data. Usage: python3 rpi_collect.py host_ip port """ import sys from data_collection import DataCollection import sensor_data from clientconnect import recv_data # from dummy_data import recv_data if __name__ == '__main__': data_collection = DataCollection(experiment_dir=sys.argv[1]) try: for unpacked_data in recv_data(): data_collection.process(unpacked_data[:12]) except KeyboardInterrupt: # Use second argument as label for data data_collection.label = [int(sys.argv[2])] data_collection.save()
from data_collection import DataCollection import sys import matplotlib.pyplot as plt import intrinio from data_analysis import DataAnalysis # USERNAME = '******' # PASSWORD = '******' USERNAME = '******' PASSWORD = '******' data_collection = DataCollection(username=USERNAME, password=PASSWORD) ticker_symbol = "AAPL" dates, prices = data_collection.retrieve_data(ticker_symbol) da = DataAnalysis() # data = da.get_yearly_trends(dates, prices, [3]) bounds, sell_dates, profits = da.get_optimal_bounds(dates, prices, low_bound_min=0.3, top_bound_max=0.5, interval=0.01, investment=100, purchase_strategy="immediate") da.plot_data(dates, prices, ticker_symbol, sell_dates=sell_dates) # if sys.argv[1]: # ticker = sys.argv[1] # start_date = sys.argv[2] # results = data_collection.get_prices(ticker, start_date) # data_collection.plot_data(ticker) # data_collection.save_data(ticker)
from data_collection import DataCollection DataCollection()
def __execute_data_collection(self, filename_output, dataHandle, document_input_list, debug_message, document_type): collection_sucess = False error_document = None has_error = False try: collection_attempts = 0 while collection_sucess is False and collection_attempts < self.max_attempts: prefix_str = "(Re)" if collection_attempts > 0 else " " a_message = "{}{}".format(prefix_str, debug_message) print("\n") print(a_message, '\tData e hora: ', datetime.now(), flush=True) proxy_info = self.__get_proxy() instaloaderInstance = localinstaloader.Instaloader( proxies=proxy_info) if document_type == "posts_hashtag": instaloaderInstance.login(user=self.instagram_user, passwd=self.instagram_passwd) dataCollection = DataCollection( filename_output=filename_output, dataHandle=dataHandle, instaloaderInstance=instaloaderInstance, instaloaderClass=localinstaloader, document_type=document_type) if proxy_info is None: print( "\t!!!ATENCAO!!!: Esta coleta nao esta utilizando proxy." ) else: proxy_alias = proxy_info["https"].split("@")[1] print("\tUtilizando o proxy:", proxy_alias) documents_collected = 0 for document_input in document_input_list: documents_collected += 1 if document_type == "profiles_posts": print("\tColetando perfil do usuario {}".format( document_input), '\tData e hora: ', datetime.now(), flush=True) has_error, error_document = dataCollection.collectProfile( username=document_input) elif document_type == "posts_profile": print("\tColetando posts do usuario {} {}/{}".format( document_input["nome_do_usuario"], documents_collected, len(document_input_list)), '\tData e hora: ', datetime.now(), "\n", flush=True) has_error, error_document = dataCollection.collectPosts( data_min=self.min_date, data_max=self.max_date, post_limit=self.max_posts, username=document_input['nome_do_usuario'], hashtag=None) elif document_type == "posts_hashtag": print("\tColetando posts da hashtag {}".format( document_input), '\tData e hora: ', datetime.now(), "\n", flush=True) has_error, error_document = dataCollection.collectPosts( data_min=self.min_date, data_max=self.max_date, post_limit=self.max_posts, username=None, hashtag=document_input) elif document_type == "media": print("\tColetando media do post {} {}/{}".format( document_input['identificador'], documents_collected, len(document_input_list)), '\tData e hora: ', datetime.now(), flush=True) has_error, error_document = dataCollection.downloadPostMedia( post_id=document_input['identificador'], media_url=document_input['identificador_midia']) elif document_type == "comments_profile" or document_type == "comments_hashtag": print("\tColetando comments do post {} {}/{}".format( document_input['identificador'], documents_collected, len(document_input_list)), '\tData e hora: ', datetime.now(), flush=True) has_error, error_document = dataCollection.collectComments( post_id=document_input['identificador'], comments_by_post_limit=self.max_comments, line_debug_number=1000) elif document_type == "profiles_comments": print("\tColetando perfil do usuario {} {}/{}".format( document_input['nome_do_usuario'], documents_collected, len(document_input_list)), '\tData e hora: ', datetime.now(), flush=True) has_error, error_document = dataCollection.collectProfile( username=document_input['nome_do_usuario']) else: print( "Tipo de coleta nao identificado. Finalizando script..." ) sys.exit(1) if has_error is True: if "429" in error_document: print( "Muitas requisicoes feitas recentemente. Erro:", error_document) collection_attempts += 1 collection_sucess = False break else: collection_sucess = True except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print( "\nProcesso de coleta sera finalizado devido a erro. O erro: ", e, '\tDetalhes: ', exc_type, fname, exc_tb.tb_lineno, '\tData e hora: ', datetime.now(), flush=True) exc_type, exc_obj, exc_tb = sys.exc_info() error_document = self.__getErrorDocument(exception_obj=e, exc_type=exc_type, exc_tb=exc_tb) self.__create_error_file( filename_output=self.filename_unified_data_file, error_document=error_document) print("Finalizando script.") sys.exit(1) finally: if has_error is True: print("{}{}".format( "\nProcesso de coleta sera finalizado devido a erro. O erro: ", error_document), flush=True) self.__create_error_file( filename_output=self.filename_unified_data_file, error_document=error_document) sys.exit(1)
def main(): simulateCamera = False simulateSensors = False simulateActuators = False ard = Arduino() global vi if simulateCamera: vi = VisionInterfaceDummy() else: vi = VisionInterface() if simulateSensors: si = SensorInterfaceDummy(ard) else: si = SensorInterface(ard) global ctl if simulateActuators: ctl = ControlDummy(ard) else: ctl = Control(ard) dc = DataCollection(vi, si) #actions action_fw = FollowWallAction(ctl) action_hb = HuntBallAction(ctl) action_cb = CaptureBallAction(ctl) action_eb = EmergencyReverseAction(ctl) action_rp = RotateInPlaceAction(ctl) actionLookup = { \ "ACTION_FOLLOW_WALL": action_fw, \ "ACTION_HUNT_BALL": action_hb, \ "ACTION_CAPTURE_BALL": action_cb, \ "ACTION_EMERGENCY_REVERSE": action_eb, \ "ACTION_ROTATE_IN_PLACE": action_rp \ } #goals goal_ex = ExploreGoal() goal_gb = GetBallGoal() goalLookup = { "GOAL_EXPLORE": goal_ex, \ "GOAL_GET_BALL": goal_gb \ } tLast = time.time() tAvg = 0 if not (simulateSensors and simulateActuators): ard.run() currentGoal = goal_ex signal.signal(signal.SIGALRM, alarm_handler) signal.alarm(180) ctl.ballCaptureOn() while (True): data = dc.get() if log: print "Data: " print data print "Goal: " + currentGoal.getName() (nextGoalName, actionName, actionArgs) = currentGoal.step(data) currentGoal = goalLookup[nextGoalName] if log: print "Action: " + actionName action = actionLookup[actionName] if actionArgs == None: action.step() else: action.step(actionArgs) tCurr = time.time() tDiff = tCurr - tLast tLast = tCurr tAvg = 0.9*tAvg + 0.1*tDiff print str(1/tAvg) + " FPS" print ""