def main(): # Get the application data f = open(".access_token", "r") keydata = json.loads(f.read()) # Initiate the moves object m = Moves(keydata) # Initiate the com link with arduino c = Comm() loops = 0 # Run program loop while True: state = 0 # Load date interval currentDate = datetime.datetime.now().strftime("%Y%m%d") oldDate = (datetime.datetime.now() - datetime.timedelta(days=30)).strftime("%Y%m%d") data = m.getRangeSummary(oldDate, currentDate) processor = DataProcessor(data) msg = processor.getDuration() print msg c.send(msg) # Sleep program untill next check time.sleep(30)
def get_graph(self, area=None, company=None): """ Queries tweets by given area/company filter. """ # Get all tweets or filtered by area if area: data = self.client.get_tweets_by_area(area) suffix = area.upper() elif company: data = self.client.get_tweets_by_company(company) suffix = company.upper() else: data = self.client.get_all_tweets() suffix = 'GLOBAL' processor = DataProcessor(data) # Get the time series data time_series = processor.prepare_time_series() # Save all the graph info in a list we can access from the view template graph = [ dict(data=[dict(x=time_series.index, y=time_series)], layout=dict(title='Tweet Frequency - ' + suffix), id='timeseries') ] # Plotly needs the graph/pandas data encoded in compatible JSON format graph = json.dumps(graph, cls=plotly.utils.PlotlyJSONEncoder) return graph
def compute_scores(self, estimator): dp = DataProcessor() already_processed = False previous_commit = None all_scores = [] reports = dp.read_and_process_report_data(self.path_to_reports_data, self.project) #print self.train_split_index_start, self.train_split_index_end reports_to_process = reports[self.train_split_index_start: self.train_split_index_end] pool = pp.ProcessPool(10) #don't have more than number of reports?? self.cur_estimator = estimator all_scores = pool.map(self.get_report_score, reports_to_process) #pool.close() #pool.join() all_matrixes = [i[0] for i in all_scores] total_tried = sum([i[1] for i in all_scores]) number_achieved = sum([i[2] for i in all_scores]) print "finished pooling" print all_scores final_MAP_score = self.MAP(all_matrixes) final_MRR_score = self.MRR(all_matrixes) print final_MAP_score, " final MAP score" print final_MRR_score, " final MRR score" print float(number_achieved)/float(total_tried), " final accuracy at k score" return final_MAP_score
def InferenceData(trainer): images = ImageIO.ReadImagesFromFolder("../data/monster/himax_processed/", '.jpg', 0) [x_live, y_live] = DataProcessor.ProcessInferenceData(images, 60, 108) live_set = Dataset(x_live, y_live) params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0} live_generator = data.DataLoader(live_set, **params) y_pred_himax = trainer.Infer(live_generator) y_pred_himax = np.reshape(y_pred_himax, (-1, 4)) h_images = images images = ImageIO.ReadImagesFromFolder("../data/monster/bebop_processed/", '.jpg', 0) [x_live, y_live] = DataProcessor.ProcessInferenceData(images, 60, 108) live_set = Dataset(x_live, y_live) params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0} live_generator = data.DataLoader(live_set, **params) y_pred_bebop = trainer.Infer(live_generator) y_pred_bebop = np.reshape(y_pred_bebop, (-1, 4)) combinedImages = [] for i in range(len(images)): img = ImageEffects.ConcatImages(images[i], h_images[i]) combinedImages.append(img) VizDroneBEV(combinedImages, y_pred_bebop, y_pred_himax)
def LoadData(args): if args.gray is not None: [x_train, x_validation, y_train, y_validation ] = DataProcessor.ProcessTrainData(args.load_trainset, 60, 108, True) [x_test, y_test] = DataProcessor.ProcessTestData(args.load_testset, 60, 108, True) else: [x_train, x_validation, y_train, y_validation ] = DataProcessor.ProcessTrainData(args.load_trainset, 60, 108) [x_test, y_test] = DataProcessor.ProcessTestData(args.load_testset, 60, 108) training_set = Dataset(x_train, y_train, True) validation_set = Dataset(x_validation, y_validation) test_set = Dataset(x_test, y_test) # Parameters num_workers = 6 params = { 'batch_size': args.batch_size, 'shuffle': True, 'num_workers': num_workers } train_loader = data.DataLoader(training_set, **params) validation_loader = data.DataLoader(validation_set, **params) params = { 'batch_size': args.batch_size, 'shuffle': False, 'num_workers': num_workers } test_loader = data.DataLoader(test_set, **params) return train_loader, validation_loader, test_loader
def test_reading_in(): dp = DataProcessor() dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/birt/") dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse/") dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse-jdt/") dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/swt/")
def LoadData(args): [x_train, x_validation, y_train, y_validation] = DataProcessor.ProcessTrainData(args.load_trainset) [x_test, y_test] = DataProcessor.ProcessTestData(args.load_testset) training_set = Dataset(x_train, y_train, True) validation_set = Dataset(x_validation, y_validation) test_set = Dataset(x_test, y_test) # Parameters # num_workers - 0 for debug in Mac+PyCharm, 6 for everything else num_workers = 0 params = { 'batch_size': args.batch_size, 'shuffle': True, 'num_workers': num_workers } train_loader = data.DataLoader(training_set, **params) validation_loader = data.DataLoader(validation_set, **params) params = { 'batch_size': args.batch_size, 'shuffle': False, 'num_workers': num_workers } test_loader = data.DataLoader(test_set, **params) return train_loader, validation_loader, test_loader
class StockAnalysisServer: def __init__(self): self.data_reader = DataReader() self.data_processor = DataProcessor() def get_expected_stock_return_probablity(self, ticker_name, time_horizon, base_amount, finalAmount, historic_data_years=1): ''' This method computes the probability of the expected return :param ticker_name: The ticker name the user wishes to return :param time_horizon:The time horizon the user wishes to invest for :param base_amount:The base amount the user wishes to start investing with :param finalAmount:The final amount the user wishes to achieve :param historic_data_years:The no of years the user wishes to use for base values calculation :return:The probability of user achieving the target amount ''' from_date = datetime.date.today() - relativedelta( years=historic_data_years) ticker_data = self.data_reader.fetch_ticker_data( ticker_name, from_date) # '1/1/2017' base_rate, volatility = self.data_processor.calc_base_rate_and_volatility_by_ticker( ticker_data) probablity_expected_return = self.data_processor.calc_expected_return_probablity_based_on_monte_carlo( base_rate, volatility, time_horizon, base_amount, finalAmount) return probablity_expected_return
def process_files_eclipse(): dp = DataProcessor() path_to_reports_data = "/home/ndg/users/carmst16/EmbeddingBugs/resources/bugreport/Eclipse_Platform_UI.xlsx" path_to_starter_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.ui/" path_to_processed_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.ui_processed_split/" path_to_temp = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.ui_temp/" reports = dp.read_and_process_report_data(path_to_reports_data, "eclipse_platform_ui") dp.process_all_files(path_to_starter_repo, reports, path_to_processed_repo, path_to_temp)
def process_files_birt(): dp = DataProcessor() path_to_reports_data = "/home/ndg/users/carmst16/EmbeddingBugs/resources/bugreport/Birt.xlsx" path_to_starter_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/birt/" path_to_processed_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/birt_processed_split/" path_to_temp = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/birt_temp/" reports = dp.read_and_process_report_data(path_to_reports_data, "birt") dp.process_all_files(path_to_starter_repo, reports, path_to_processed_repo, path_to_temp)
def process_files_swt(): dp = DataProcessor() path_to_reports_data = "/home/ndg/users/carmst16/EmbeddingBugs/resources/bugreport/SWT.xlsx" path_to_starter_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.swt/" path_to_processed_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.swt_processed_split_text_trial/" path_to_temp = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.swt_temp_again/" reports = dp.read_and_process_report_data(path_to_reports_data, "swt") dp.process_all_files(path_to_starter_repo, reports, path_to_processed_repo, path_to_temp)
def __init__(self, model_path=None): self.config = DataConfig() self.dp = DataProcessor(self.config) self.num_channels = self.config.num_channels self.row = self.config.img_height self.col = self.config.img_width self.ch = self.config.num_channels self.model = self.load_model(model_path)
def __init__(self, path, part_size, stream_id): self.__part_size = part_size dp = DataProcessor() fl = FileLoader() data = fl.load_file(path) self.__size = len(data) self.__chunks = dp.split_data(data, part_size) debug('FileStreamer.init(): len(self.__chunks) == %d' % len(self.__chunks)) self.__stream_id = stream_id
class Scheduler(object): """description of class""" def __init__(self): self.schedule = DataProcessor( "C:/Users/JakeT/OneDrive/documents/visual studio 2017/Projects/Pytter/Pytter/database/tweet_schedule.csv" ) def schedule_tweet(self, tweet): # print(self.schedule, "schedule before") self.schedule.add_row(tweet.to_list()) # print(self.schedule, "schedule after") pass
def main(): # Get the application data f = open('.access_token', 'r') keydata = json.loads(f.read()) # Initiate the moves object m = Moves(keydata) # Initiate the com link with arduino c = Comm() loops = 0 # Run program loop while True: state = 0 if loops is 0: # Load date interval currentDate = datetime.datetime.now().strftime('%Y%m%d') oldDate = (datetime.datetime.now() - datetime.timedelta(days=30)).strftime('%Y%m%d') data = m.getRangeSummary(oldDate, currentDate) processor = DataProcessor(data) raw = processor.newDataProcessor() if processor.checkMoving(): state = 1 # Check realtime realtime = datetime.datetime.strptime(requests.get('http://studier.albinhubsch.se/lucy-light').text, "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() if realtime + datetime.timedelta(minutes=10) > now: state = 1 msg = str(state) + ',' + raw c.send(msg) if loops < 10: loops += 1 else: loops = 0 # Sleep program untill next check time.sleep(6)
def delta_learning_rule(data, a, lr, co): count = 0 npa = np.array(a) co_copy = deepcopy(co) dp.plot_graph(npa, co_copy) for i in range(len(data)): data['input'][i].insert(0, 1) while count < 4: for row in data.itertuples(): y = step_function(npa.dot(np.array([row[1]]).T)) npa = npa + lr * (row[2] - y) * np.array(row[1]) dp.plot_graph(npa, co_copy) count += 1
def spl(data, a, lr, co): count = 0 npa = np.array(a) co_copy = deepcopy(co) dp.plot_graph(npa, co_copy) for i in range(len(data)): data['input'][i].insert(0, 1) while count < 4: for row in data.itertuples(): gx = npa.dot(np.array([row[1]]).T) if row[2] > 0 and gx[0] < 0 or row[2] < 0 and gx[0] > 0: npa = npa + lr * row[2] * np.array(row[1]) dp.plot_graph(npa, co_copy) count += 1
class Profile(object): """TODO: ADD CLASS DESCRIPTION""" def __init__(self): self.processor = DataProcessor(file_path=USERPATH) def authenticate(self): consumer_key = list(self.processor.get_column_np("consumer_key"))[0] consumer_secret = list( self.processor.get_column_np("consumer_secret"))[0] access_token = list(self.processor.get_column_np("access_token"))[0] access_token_secret = list( self.processor.get_column_np("access_token_secret"))[0] return twitter.OAuth(access_token, access_token_secret, consumer_key, consumer_secret)
def __init__(self, filename, embedding_method='deepwalk', **kwargs): self.dp = DataProcessor(filename) self.workers = cpu_count() self.embedding_model = None self.embedding_method = embedding_method print("Init over.") sys.stdout.flush() if embedding_method == 'deepwalk': self.deepwalk(**kwargs) elif embedding_method == 'grarep': self.grarep(**kwargs) elif embedding_method == "node2vec": self.node2vec(**kwargs) else: raise TypeError("Unsupport type %s" % embedding_method)
def getDataFromDB(): """ Funzione per ottenere i dati dal server locale influxdb contenente le misurazioni dei pazienti. """ ipDB = os.getenv('INFLUX_IP_AI', 'localhost') portDB = os.getenv('INFLUX_PORT_AI', '8086') userDB = os.getenv('INFLUX_USER_AI', 'admin') passwordDB = os.getenv('INFLUX_PW_AI', 'G10m1R0m3') nameDB = os.getenv('INFLUX_DB_AI', 'giomi') dr = DataRetriever(metrics) dfs = dr.loadDataFromDB(ipDB, portDB, userDB, passwordDB, nameDB) dp = DataProcessor(metrics, defaults) df = dp.applyPipeline(dfs) return df
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", filename="log.txt", filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) model = FrontNet(PreActBlock, [1, 1, 1]) ModelManager.Read('Models/FrontNetNicky.pt', model) DATA_PATH = "/Users/usi/PycharmProjects/data/" [x_test, y_test, z_test] = DataProcessor.ProcessTestData(DATA_PATH + "TestNicky.pickle", 60, 108) #x_test = x_test #y_test = y_test test_set = Dataset(x_test, y_test) params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0} test_generator = data.DataLoader(test_set, **params) trainer = ModelTrainer(model) valid_loss_x, valid_loss_y, valid_loss_z, valid_loss_phi, outputs, gt_labels = trainer.ValidateSingleEpoch( test_generator) VizDroneBEV(x_test, y_test, z_test, outputs)
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", filename="log.txt", filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) DATA_PATH = "/Users/usi/PycharmProjects/data/160x90/" name = "160x90HimaxMixedTrain_12_03_20.pickle" [x_test, y_test, z_test] = DataProcessor.ProcessTestData(DATA_PATH + name, True) h = x_test.shape[2] w = x_test.shape[3] x_test = np.reshape(x_test, (-1, h, w)) if name.find(".pickle"): name = name.replace(".pickle", '') VizWorldTopView(x_test, y_test, z_test, True, name)
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", filename="log.txt", filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) DATA_PATH = "/Users/usi/PycharmProjects/data/" [x_test, y_test] = DataProcessor.ProcessTestData(DATA_PATH + "patterns.pickle", 60, 108, True) #x_test = x_test[:500] #y_test = y_test[:500] test_set = Dataset(x_test, y_test) params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0} test_generator = data.DataLoader(test_set, **params) Viz4PoseVariables(x_test, y_test)
class MyListener(StreamListener): def __init__(self): self.dp = DataProcessor() self.conf = Config() self.auth = tweepy.OAuthHandler(self.conf.consumer_key, self.conf.consumer_secret) self.auth.set_access_token(self.conf.access_token, self.conf.access_token_secret) self.conn = Connector() def on_data(self, raw_data): try: self.conn.insert_raw_twitter(raw_data, self.conn.twitterdb_demo_pub) target_info = self.dp.get_geo_twi_target_info(raw_data) self.conn.insert_raw_twitter_result(target_info, self.conn.twitterdb_demo_results_pub) if target_info: self.conn.insert_raw_twitter_result(target_info, self.conn.twitterdb_demo_results_pub) return True except BaseException as e: print("Error on_data:%s" % str(e)) return True def on_error(self, status_code): if status_code == 420: print("ERROR: Rate limit reached") print(status_code) return True def on_timeout(self): print("ERROR: Timeout...") return True # Don't kill the stream
def train(self): #this describes everything you want to search over parameters = {'size': [100, 500], 'window': [5, 10], 'sg': [1], 'workers': [16], 'hs': [0], 'negative': [25], 'iter': [1] } dp = DataProcessor() data = dp.get_stackoverflow_data_sentences_all(["/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/swt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/birt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse-jdt/"]) #if self.document: # data = dp.get_stackoverflow_data_document(self.path_to_stackoverflow_data) #else: # data = dp.get_stackoverflow_data_sentences(self.path_to_stackoverflow_data) w2v = W2VTransformer() # see: https://stackoverflow.com/questions/44636370/scikit-learn-gridsearchcv-without-cross-validation-unsupervised-learning/44682305#44682305 #clf = GridSearchCV(w2v, parameters, scoring={"MPP": self.call_MRR, "MAP": self.call_MAP}, verbose=2, n_jobs=3, refit="MAP", cv=[(slice(None), slice(None))]) #current implementation version only usees MAP to score #cv=[(slice(None), slice(None))] #clf = GridSearchCV(w2v, parameters, scoring= self.compute_scores, verbose=2) cur_max = 0 best_model = None parameters["size"] = [100] parameters["window"] = [10] for s in parameters["size"]: for w in parameters["window"]: print len(data) print "training model" model = gensim.models.Word2Vec(sentences=data, sg=1, size=s, window=w, workers=16, hs=0, negative=25, iter=5) print "model trained" print parameters score = self.compute_scores(model) if score > cur_max: cur_max = score best_model = model print cur_max word_vectors = best_model.wv print "VOCAB_SIZE", len(model.wv.vocab) word_vectors.save("best_model")
def __init__(self): ops.reset_default_graph() self.sess = tf.InteractiveSession() self.dp = DataProcessor(DataConfig()) self.config = self.dp.config self.row = self.config.img_height self.col = self.config.img_width self.ch = self.config.num_channels self.batch_count = 0 self.create_nvidia_model() self.create_train_method() self.epoch_count = 0 self.step_count = 0 self.loss_val = 1 self.saver = tf.train.Saver() if self.config.model_continue: self.restore_sess() else: self.sess.run(tf.global_variables_initializer())
def get_model_coverage(self): parameters = {'size': [100, 500], 'window': [5, 10], 'sg': [1], 'workers': [16], 'hs': [0], 'negative': [25], 'iter': [1] } dp = DataProcessor() data = dp.get_stackoverflow_data_sentences_all(["/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/swt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/birt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse-jdt/"]) model = gensim.models.Word2Vec(sentences=data, sg=1, size=100, window=10, workers=16, hs=0, negative=25, iter=1) vocab = model.wv.vocab print "VOCAB_SIZE", len(vocab) reports = dp.read_and_process_report_data(self.path_to_reports_data, self.project) all_report_text = [] all_source_file_text = [] for report in reports: report_text = report.processed_description file_path = self.path_to_processed_repo + str(report.reportID) + "/" all_report_text.extend(report_text) for dir_, _, files in os.walk(file_path): for fileName in files: relDir = os.path.relpath(dir_, file_path) relFile = os.path.join(relDir, fileName) full_path = file_path + relFile with open(full_path, 'r') as content_file: content = content_file.readlines() for line in content: l = line.strip().split(",") all_source_file_text.extend(l) all_report_vocab = set(all_report_text) all_source_file_vocab = set(all_source_file_text) print "report coverage", len(set.intersection(all_report_vocab, vocab))/ float(len(all_report_vocab)) print "source file coverage", len(set.intersection(all_source_file_vocab, vocab))/ float(len(all_source_file_vocab))
def Selector1(): selector = Selector() dataProcessor = DataProcessor() #abil = selector.getAbilityWithId(3) passed = selector.runSelector() print(passed) selector.setCharacterStats(15, 15) passed = selector.runSelector() print(passed) print(selector.HP) print(selector.getAuraWithId(selector.getAbilityWithId(2)['auraOne']))
def main(): reader = DataReader('dataSrc') data = reader.readCoordinates() processor = DataProcessor(data) locations = processor.processDataPoints() try: for location in locations: location.state.country.addNew() location.state.country_id = location.state.country.id #location.state.country = None location.state.addNew() location.state_id = location.state.id #location.state = None location.addNew() except Exception as e: print(e) print(Location.listAll())
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", filename="log.txt", filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) DATA_PATH = "/Users/usi/PycharmProjects/data/160x90/" # Get baseline results picklename = "160x90HimaxMixedTest_12_03_20.pickle" [x_test, y_test] = DataProcessor.ProcessTestData(DATA_PATH + picklename) test_set = Dataset(x_test, y_test) params = {'batch_size': 1, 'shuffle': False, 'num_workers': 1} test_generator = data.DataLoader(test_set, **params) model = Dronet(PreActBlock, [1, 1, 1], True) ModelManager.Read('../PyTorch/Models/DronetHimax160x90AugCrop.pt', model) trainer = ModelTrainer(model) MSE2, MAE2, r2_score2, outputs2, gt_labels2 = trainer.Test(test_generator) # Get pitch values picklename = "160x90HimaxMixedTest_12_03_20Rot.pickle" r_test = DataProcessor.GetRollFromTestData(DATA_PATH + picklename) print(r_test) if picklename.find(".pickle"): picklename = picklename.replace(".pickle", '') Plot2Models(r_test, picklename, r2_score2)
def roberta_pair_task(config): tokenizer = BertTokenizer.from_pretrained(config.tokenizer_file, do_lower_case=config.do_lower_case) processor = DataProcessor(config) config.class_list = processor.get_labels() config.num_labels = len(config.class_list) train_examples = processor.get_train_examples() dev_examples = processor.get_dev_examples() augment_examples = processor.read_data_augment(config.data_augment_method) cur_model = MODEL_CLASSES[config.use_model] model = cur_model(config) logging.info("self config %s", config_to_json_string(config)) model_example, dev_evaluate, predict_label = cross_validation( config=config, model=model, tokenizer=tokenizer, train_examples=train_examples, dev_examples=dev_examples, pattern=config.pattern, train_enhancement=augment_examples if config.data_augment else None, test_examples=None) logging.info("dev_evaluate: {}".format(dev_evaluate)) if config.pattern == 'full_train': model_save(config, model_example) return dev_evaluate
def get_graph(self, area=None, company=None): """ Queries tweets by given area/company filter. """ # Get all tweets or filtered by area if area: data = self.client.get_tweets_by_area(area) suffix = area.upper() elif company: data = self.client.get_tweets_by_company(company) suffix = company.upper() else: data = self.client.get_all_tweets() suffix = 'GLOBAL' processor = DataProcessor(data) # Get the time series data time_series = processor.prepare_time_series() # Save all the graph info in a list we can access from the view template graph = [ dict( data=[ dict( x=time_series.index, y=time_series ) ], layout=dict( title='Tweet Frequency - ' + suffix ), id = 'timeseries' ) ] # Plotly needs the graph/pandas data encoded in compatible JSON format graph = json.dumps(graph, cls=plotly.utils.PlotlyJSONEncoder) return graph
def Test(): model = Dronet(PreActBlock, [1, 1, 1], True) ModelManager.Read("Models/DronetHimax160x90.pt", model) trainer = ModelTrainer(model) #ModelManager.Read("Models/FrontNetGray.pt", model) [x_test, y_test] = DataProcessor.ProcessTestData( "/Users/usi/PycharmProjects/data/160x90HimaxStatic_12_03_20.pickle") test_set = Dataset(x_test, y_test) params = {'batch_size': 64, 'shuffle': False, 'num_workers': 1} test_loader = data.DataLoader(test_set, **params) trainer.Predict(test_loader)
def __init__(self): self.n_clusters = 2 self.algorithms = { 'current': STRPAlgorithm(self.n_clusters), 'future': STRPAlgorithm(self.n_clusters) } self.data_processors = { 'current': DataProcessor(), 'future': DataProcessor() } self.max_absolute_treshold = 13 self.min_absolute_treshold = 5 self.max_percentual_treshold = .1 self.min_percentual_treshold = .02 self.entity_temper_percentual_threshold = .2 self.is_running = False self.container = list() self.processed_nodes = list() self.raw_data = list() self.client = udp_client.UDPClient(OSC_SERVER, 8000) self.last_iteration = datetime.now() print('Application initialised') self.is_running = True # Create dummy data for i, d in enumerate(start_data): transformed_data = self.data_processors[ 'current'].transform_input_data(d) self.processed_nodes.append(transformed_data)
def main(): data_path = "../data/training-Obama-Romney-tweets.xlsx" test_data_path = '' # test_data_path = '../data/testing-Obama-Romney-tweets-3labels.xlsx' try: opts, args = getopt.getopt(sys.argv[1:], "d:t:") for o, a in opts: if o == '-d': data_path = a elif o == '-t': test_data_path = a except getopt.GetoptError as err: # print help information and exit: print str(err) print 'read the readme file to know how to run this project' sys.exit(2) dp = DataProcessor(data_path) tc = TweetClassifier() if test_data_path != '': dpt = DataProcessor(test_data_path) print '\n****** OBAMA ******\n' data = dp.load_excel_data('Obama') data_test = dpt.load_excel_data('Obama') report = tc.train_test(data, data_test) DataProcessor.print_report(report) print '\n****** ROMNEY ******\n' data = dp.load_excel_data('Romney') data_test = dpt.load_excel_data('Romney') report = tc.train_test(data, data_test) DataProcessor.print_report(report) else: print '\n****** OBAMA ******\n' data = dp.load_excel_data('Obama') report = tc.crossvalidate(data, 10) DataProcessor.print_report(report) print '\n****** ROMNEY ******\n' data = dp.load_excel_data('Romney') report = tc.crossvalidate(data, 10) DataProcessor.print_report(report)
data_file = "data/dataS.txt" para_file = "data/paraS.txt" result_file = "data/resultS.txt" if len(sys.argv) > 1: if sys.argv[1] == '-d': if len(sys.argv) < 4: logging.error("Not enough arguments !!") sys.exit() else: data_file = sys.argv[2] para_file = sys.argv[3] result_file = sys.argv[4] DP = DataProcessor(data_file, para_file, result_file, False) inputData = DP.loadInput() startTime = datetime.now() print 'Execution started at:', startTime algo = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"], logging.INFO) # algo = BruteForceSPM(inputData["T"], inputData["MS"], inputData["SDC"]) outputData = algo.run() print 'Execution time:', datetime.now() - startTime outputDict = defaultdict(list) for seq in outputData: count = 0 for d in inputData["T"]: