def owner_main(): mydb = myc.connect(host="", user="", passwd="", database="") mycur = mydb.cursor() print("Welcome to the owner module") user = input("Enter your username") passw = input("Enter your password") if a.auth_main(user, passw): print( "Welcome back Dear owner \nPress one to check the logs of your current turfs\nPress 2 to add a new " "turf on your account\nPress three to exit ") ch = int(input()) if ch == 1: x = ds.data_main(user) print(x) print( "press the Name of thr turf to which you want to fetch the data" ) name = input().lower() result = ds.data_main(name) for i in result: print(i) elif ch == 2: name = input( "Enter the name with which you want to make the entry") dw.owner_writer(name, user) elif ch == 3: print("Thanks for using AM's Database Manager") exit(1) else: print("Authentication Failed")
def classify_to_csv(encoder_type, classifier_type, simple_classifier, extended_classifier, test_tsv, output_file): """ Takes classifiers for each grain of classification and creates a csv identical to the test tsv but with predicted classifications and accuracies. """ if encoder_type == "bow" or encoder_type == "lstm": simple = "/{0}_simple.pt".format(encoder_type) extended = "/{0}_extended.pt".format(encoder_type) elif classifier_type == "pooling": if args.word_embedding.startswith("fasttext"): simple = "/{0}_{1}_simple.pt".format(encoder_type, "fasttext") extended = "/{0}_{1}_extended.pt".format(encoder_type, "fasttext") else: simple = "/{0}_{1}_simple.pt".format(encoder_type, "glove") extended = "/{0}_{1}_extended.pt".format(encoder_type, "glove") simple_classifier.load_checkpoint(args.saved_models + simple) extended_classifier.load_checkpoint(args.saved_models + extended) dw = DataWriter(simple_classifier, extended_classifier, test_tsv, output_file) dw.write()
html = cve_getter.get_specific_search_results( ven, product, version, index) cve_info = cve_getter.get_cve_info(html) index += PAGE_ITEM_MAX # We can only get 20 items in one page. item_count -= PAGE_ITEM_MAX for search_result in cve_info: data_to_write.append(search_result.name) data_to_write.append(search_result.desc) data_to_write.append(search_result.date) data_to_write.append(search_result.cvss) writer.write_excel(data_to_write, has_title) if has_title: has_title = False data_to_write.clear() else: logger.info( 'Product %s, version %s not found. supported verions %s', product, version, versions) if __name__ == '__main__': parser = argparse.ArgumentParser( description='This a tool to fetch cve info from nvd.') parser.add_argument('product', help="The product name.") parser.add_argument('version', help="The product version.") args = parser.parse_args() cve_writer = DataWriter() get_cve_results(args.product, args.version, cve_writer) cve_writer.close()
print("... %s of %s : Downloading %s data " % (index, len(rows), company)) df_data = self.downloadStockData(market_type, code, year1, month1, date1, year2, month2, date2) if df_data is not None: df_data_indexed = df_data.reset_index() self.dbwriter.updatePriceToDB(code, df_data_indexed) index += 1 #return self.dbhandler.endTrans() print("Done!!!") if __name__ == "__main__": services.register('dbhandler', DataHandler()) services.register('dbwriter', DataWriter()) crawler = DataCrawler() #html_codes = crawler.downloadCode('2') #print(html_codes.__class__) #crawler.parseCodeHTML(html_codes, '2') #crawler.updateAllCodes() crawler.updateAllStockData(2, 2010, 1, 1, 2015, 12, 1, start_index=1)
def twitter_search(search_method, search_terms_file, search_terms_col, filter_method, num_iterations, testing_mode, flush_db): print_args([ search_method, search_terms_file, search_terms_col, filter_method, num_iterations, testing_mode, flush_db ], [ i.strip() for i in ('search_method,' 'search_terms_file, search_terms_col,' 'filter_method, num_iterations,' 'testing_mode, flush_db').split(',') ]) start_time = time.time() if testing_mode: send_email( 'Hi there!', 'Running script in testing mode at {}'.format( datetime.datetime.now()), ['*****@*****.**']) if flush_db: # Connect to database dw = DataWriter(MONGO_DB_HOST, MONGO_DB_PORT) dw.flush(MONGO_DB_NAME) print('Database flushed. Stopping script.') return search_terms = read_search_terms(search_terms_file, search_terms_col) if testing_mode: # Limit the number of terms print('TEST MODE - Limiting the number of terms to 10.') search_terms = search_terms[:10] found_ids = {term: set() for term in search_terms} # TODO: *idea* # Avoid adding duplicate tweets. # Have collection of all tweets, and a lightweight collection # e.g. [{"term": "btc", "query": "$btc", "id": 2837127372}, ...etc] since_ids = {} for i_iter, _ in enumerate(range(num_iterations)): # Connect to database dw = DataWriter(MONGO_DB_HOST, MONGO_DB_PORT) len_0 = dw.get_collection_len(MONGO_DB_NAME, MONGO_DB_COLLECTION) for term in tqdm(search_terms): print('Term = {}'.format(term)) # authorize and load the twitter API api = load_api() if search_method == 'ticker': q = '${}'.format(term) elif search_method == 'hashtag': q = '#{}'.format(term) else: q = term if filter_method: if filter_method == 'crypto': trash_filters = ( '-filter:retweets -filter:links ' '-gainers -losers -alert -alerts -changes ' '-change -changed -increased -decreased') else: raise ValueError( 'Bad filter_method value: {}'.format(filter_method)) q += ' {}'.format(trash_filters) print('Query = {}'.format(q)) # Start searching for tweets from 1 day ago until_date = (DATE - datetime.timedelta(days=1)).strftime('%Y-%m-%d') # Search backwards until since_id is reached since_id = get_since_id(term) api, tweets, _found_ids, since_id = \ tweet_search(api, term, q, found_ids=found_ids[term], since_id=since_id, until_date=until_date, local_filestore=JSON_FILE_PATH, dw=dw, testing_mode=testing_mode) found_ids[term] = _found_ids since_ids[term] = since_id print('Finished iteration = {}/{}'.format(i_iter + 1, num_iterations)) # Get number of new tweets found, and write to file num_new_documents = dw.get_collection_len(MONGO_DB_NAME, MONGO_DB_COLLECTION) - len_0 print('Saved {} new tweets'.format(num_new_documents)) if not os.path.exists('num_iterations_report.log'): with open('num_iterations_report.log', 'w') as f: f.write('date,iteration,num_new_tweets_found\n') with open('num_iterations_report.log', 'a') as f: f.write('{},{},{},\n'.format(DATE_STR, i_iter, num_new_documents)) dump_since_ids(since_ids) print('Done pulling data up to {} UTC'.format(DATE_STR)) end_time = (time.time() - start_time) / 3600 print('Runtime = {} hours'.format(end_time)) with open(os.path.join(DIR_PATH, 'runtime.log'), 'a+') as f: f.write('{} - {} hours\n'.format(datetime.datetime.now(), end_time))
def tweet_search(api, term, query, found_ids, until_date='', since_id=0, save_freq=1500, dw=None, local_filestore='', num_iterations=3, testing_mode=False): """ Search through recent tweets matching query, starting from previous ID if available or from oldest tweet exposed through the search API. """ if not local_filestore: raise ValueError( 'Please specify local_filestore in tweet_search function') if not dw: # Connect to database dw = DataWriter(MONGO_DB_HOST, MONGO_DB_PORT) # Set date to start search (will search backwards from this point) if not until_date: until_date = (datetime.datetime.utcnow() - datetime.timedelta(days=1))\ .strftime('%Y-%m-%d') if testing_mode: save_freq = 5 errors = 0 max_errors = 5 attempt = 0 max_attempts = 1 searched_tweets = [] _since_id = since_id next_since_id = None _max_id = None # Run the tweet search loop. Starting at until_date and going back # until no more tweets are available OR the since_id is reached while True: try: # Twitter API searches backwards, starting at most recent # tweets above since_id and lower than max_id. We search # from max_id to since_id, updating max_id after each iteration. if _max_id: new_tweets = api.search(q=query, count=100, since_id=str(_since_id), max_id=str(_max_id), result_type='recent', until=until_date, tweet_mode='extended') else: new_tweets = api.search(q=query, count=100, since_id=str(_since_id), result_type='recent', until=until_date, tweet_mode='extended') time.sleep(3) for t in new_tweets: print(t._json['created_at'], t._json['id']) print('Found {} tweets'.format(len(new_tweets))) if not new_tweets: raise ValueError('No new tweets found') new_tweets_json = [t._json for t in new_tweets] print('len(new_tweets_json) before ID filter', len(new_tweets_json)) # Save starting ID to define end point of next run if next_since_id == None: next_since_id = new_tweets_json[0]['id'] # Update max ID to push back search threshold _max_id = int(new_tweets_json[-1]['id']) - 1 print('max_id', _max_id) # Get IDs new_ids = set([t['id'] for t in new_tweets_json]) # Filter out IDs already found new_tweets_json = [ t for t in new_tweets_json if t['id'] not in found_ids ] print('len(new_tweets_json) after ID filter', len(new_tweets_json)) # Update IDs found_ids = found_ids.union(new_ids) # Add metadata to tweets new_tweets_json = [{ 'term': term, 'q': query, 'get_date': datetime.datetime.utcnow().strftime(TWITTER_DATE_FORMAT), 'tweet': t } for t in new_tweets_json] # Extend list to be saved searched_tweets.extend(new_tweets_json) if len(searched_tweets) > save_freq: dw.write(searched_tweets, MONGO_DB_NAME, MONGO_DB_COLLECTION, filename=os.path.join(local_filestore, term_to_filepath(term), '{}.json'.format(DATE_STR))) searched_tweets = [] if testing_mode: print( 'TEST MODE - Tweets saved, returning from tweet_search function' ) return api, searched_tweets, found_ids, next_since_id except tweepy.TweepError: print('Rate limit reached, waiting 15 minutes') print('(until: {})'.format(datetime.datetime.now() + datetime.timedelta(minutes=15))) # t0 = time.time() dw.write(searched_tweets, MONGO_DB_NAME, MONGO_DB_COLLECTION, filename=os.path.join(local_filestore, term_to_filepath(term), '{}.json'.format(DATE_STR))) searched_tweets = [] # time.sleep((15 * 60) - (time.time() - t0)) time.sleep(15 * 60) continue except ValueError as e: if 'No new tweets found' in str(e): attempt += 1 if attempt > max_attempts: print('No tweets found, stopping search') break else: print('No tweets found, trying {} more time(s)'\ .format(max_attempts - attempt + 1)) continue print('Re-loading the twitter API') api = load_api() print('Waiting for a few seconds ...') time.sleep(3) else: action, errors = log_errors(e, errors, max_errors, api, term, query, until_date, since_id, save_freq, local_filestore, num_iterations, testing_mode) if action == 'continue': continue elif action == 'break': break except Exception as e: action, errors = log_errors(e, errors, max_errors, api, term, query, until_date, since_id, save_freq, local_filestore, num_iterations, testing_mode) if action == 'continue': continue elif action == 'break': break dw.write(searched_tweets, MONGO_DB_NAME, MONGO_DB_COLLECTION, filename=os.path.join(local_filestore, term_to_filepath(term), '{}.json'.format(DATE_STR))) return api, searched_tweets, found_ids, next_since_id
def __init__(self, client_id, server_host, server_port, mc_address, mc_port, send_interval=5, chunk_size=10000000, file_size=10000000 * 2, run_time=30, test_path='./', dd_method=False): """ :param client_id: string, unique id for the client :param server_host: string, ip address/hostname for sever :param server_host: string, port server will listen on :param mc_address: string, multicast group address to publish heartbeat :param mc_port: int, port for multicast group :param send_interval: int, heartbeat send interval :param chunk_size: int, in bytes, the size to data size to write to a file at a time :param file_size: int, maximum file size for the writer :param run_time: int, self explanatory, ya know :param test_path: string, path to write the data files :param dd_method: bool, use dd or not for the file writing """ self.client_id = client_id self.server_host = server_host self.chunk_size = chunk_size self.file_size = file_size self.run_time = run_time self.mc_group = mc_address, mc_port self.send_interval = send_interval self.test_path = test_path # if dd_method is True, dd will be used # if dd_method is False, python file object will be used self.dd_method = dd_method logging.basicConfig(filename=client_id + '.log', format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO) # Unsure on requirements, going to assume worst case # Need to check to see if the chunk size and # run time will allow two instances of the time to be written try: # check to see if two chunks can be written to the file assert self.file_size / self.chunk_size >= 2 except AssertionError: print "Client chunk size is too small for max file size." \ "Please reconfigure" exit(1) # Create the initial TCP connection self.tcp = TCPClient((server_host, server_port)) self.hb1 = Heartbeat(self.mc_group[0], self.mc_group[1], self.client_id, self.send_interval) self.kill_sig = Queue(1) self.hb_process = Process(target=self.hb1.run, args=(self.kill_sig, )) self.hb_process.daemon = True self.queue1 = Queue() dw1 = DataWriter(self.chunk_size, self.file_size, self.dd_method, self.test_path) self.dw_process = Process(target=dw1.run, args=(self.queue1, self.kill_sig)) self.dw_process.daemon = True self.dw_process_pid = None
dim=1).detach().cpu().numpy()) ] else: dev_preds = [ id2label[x] for x in (torch.argmax(nnmodel(dev_data), dim=1).detach().numpy()) ] dataloader = Dataloader() dev_spans = dataloader.read_spans( file_name="./datasets/dev-task-TC-template.out") dev_spans["gold_label"] = dev_preds datawriter = Datawriter() datawriter.pred_writer(dev_spans, "./predictions/dev_preds.txt") if gpu: test_preds = [ id2label[x] for x in (torch.argmax(nnmodel(test_data.cuda()), dim=1).detach().cpu().numpy()) ] else: test_preds = [ id2label[x] for x in (torch.argmax(nnmodel(test_data), dim=1).detach().numpy()) ] dataloader = Dataloader()
"device_id", # the device we want to locate "system_id", # TANGO has system id 7585, and Pozyx has 115200 (also used for socket port and baudrate) "anchor_id", # anchor used to take the current measure "px", # [px, py, pz] is the position of the device in world coordinate "py", "pz", "theta_x", # [theta_x, theta_y, theta_z] corresponds to the orientation of the device (radian) "theta_y", "theta_z", "distance", # distance between anchor and device "rssi", # received signal strength ] # Initialize the datawriter which will log the received measures in a csv file datawriter = DataWriter(file_path, header=data_fields, verbose=True, verbose_interval=1) # ----------- initialize the threads ------------------- threads_list = [] if use_pozyx: threads_list.append( PozyxAcquisition(usb_port=usb_port, datawriter=datawriter)) if use_tango: threads_list.append( TangoAcquisition(local_ip=local_ip, datawriter=datawriter)) # start the threads try: # Start threads for thread in threads_list:
config = Config(os.getcwd() + '/config.json') client = None if config.console.type == 'serial': client = SerialConsole( config.console) # The serial console to acquire data source stream client.login(user=config.console.login, password=config.console.password) elif config.console.type == 'ssh': client = SshConsole(config.console) ver = '0.0.0.0' if client is None: data_reader = DataReaderStub() else: ver = read_fw_version(client) data_reader = DataReader( client) # The parsing and aggregation of data into Stats data_writer = DataWriter(os.getcwd()) # Output Stats data to file figure = plt.figure('Mem, CPU over time - ' + ver) du = DataUpdater(data_reader, data_writer, figure) # The interval timer is restarted after each call to __call__ returns # So the total time for each iteration is interval + data processing time (sleeps) anim = FuncAnimation(figure, du, init_func=du.init, interval=config.interval * 1000, blit=True) plt.show()
# Splitting into separate groups # Starting from 1, 0 position is an empty row most_played = groups[0].select("tr.app")[1:] # Starting from 0 - the first row trending = groups[1].select("tr.app") # Starting from 0 - the first row popular = groups[2].select("tr.app") # Starting from 0 - the first row hot_releases = groups[3].select("tr.app") # Scraping ------------------------------------------------------------------------------------------------------------- data_scraper = DataScraper() data_writer = DataWriter() # Processing Most Played most_played_data = [] for game_row in most_played: most_played_data.append(data_scraper.get_most_played_data(game_row)) data_writer.write_to_csv(most_played_data, "most_played") # data_writer.write_to_json(most_played_data, "most_played") # Processing Trending trending_data = [] for game in trending: trending_data.append(data_scraper.get_trending_data(game))