def CLI(parser, callback, log): #standard arguments for logging parser.add_argument("--silent", help="Turn off logging", action='store_true', default=False) parser.add_argument("--log-file", help="Output log file (default: standard output)", default=None) args = parser.parse_args() #set up logging log.silent = args.silent if args.log_file: log.stream = open(args.log_file, 'w') start = datetime.now() log.info("Started at: %s\n" % str(start)) #run script callback(args) end = datetime.now() time_taken = format_seconds((end - start).seconds) log.info("Completed at: %s\n" % str(end)) log.info("- Total time: %s\n" % str(time_taken))
def cmd_uptime(self, args, tojid, typ="chat"): """ calculates, formats it human readable and then echos the uptime """ uptime = datetime.datetime.today()-self.birthday # python >= 2.7: http://docs.python.org/library/datetime.html#datetime.timedelta.total_seconds #uptimestr = utils.format_seconds(uptime.total_seconds()) # for compatibility with python < 2.7 uptimestr = utils.format_seconds((uptime.microseconds + (uptime.seconds + uptime.days * 24 * 3600) * 10 ** 6) / 10 ** 6) self.send(tojid, uptimestr, typ)
def eval_bleu(self): self.logger.info('Evaluate dev BLEU') start = time.time() self.model.eval() avg_bleus = [] dump_dir = self.args.dump_dir with torch.no_grad(): for pair in self.pairs: self.logger.info('--> {}'.format(pair)) src_lang, tgt_lang = pair.split('2') src_lang_idx = self.data_manager.lang_vocab[src_lang] tgt_lang_idx = self.data_manager.lang_vocab[tgt_lang] logit_mask = self.data_manager.logit_masks[tgt_lang] data = self.data_manager.translate_data[pair] src_batches = data['src_batches'] sorted_idxs = data['sorted_idxs'] ref_file = data['ref_file'] all_best_trans, all_beam_trans = self._translate(src_batches, sorted_idxs, src_lang_idx, tgt_lang_idx, logit_mask) all_best_trans = ''.join(all_best_trans) best_trans_file = join(dump_dir, '{}_val_trans.txt.bpe'.format(pair)) open(best_trans_file, 'w').close() with open(best_trans_file, 'w') as fout: fout.write(all_best_trans) all_beam_trans = ''.join(all_beam_trans) beam_trans_file = join(dump_dir, '{}_beam_trans.txt.bpe'.format(pair)) open(beam_trans_file, 'w').close() with open(beam_trans_file, 'w') as fout: fout.write(all_beam_trans) # merge BPE nobpe_best_trans_file = join(dump_dir, '{}_val_trans.txt'.format(pair)) ut.remove_bpe(best_trans_file, nobpe_best_trans_file) nobpe_beam_trans_file = join(dump_dir, '{}_beam_trans.txt'.format(pair)) ut.remove_bpe(beam_trans_file, nobpe_beam_trans_file) # calculate BLEU bleu, msg = ut.calc_bleu(self.args.bleu_script, nobpe_best_trans_file, ref_file) self.logger.info(msg) avg_bleus.append(bleu) self.stats[pair]['dev_bleus'].append(bleu) # save translation with BLEU score for future reference trans_file = '{}-{}'.format(nobpe_best_trans_file, bleu) shutil.copyfile(nobpe_best_trans_file, trans_file) beam_file = '{}-{}'.format(nobpe_beam_trans_file, bleu) shutil.copyfile(nobpe_beam_trans_file, beam_file) avg_bleu = sum(avg_bleus) / len(avg_bleus) self.stats['avg_bleus'].append(avg_bleu) self.logger.info('avg_bleu = {}'.format(avg_bleu)) self.logger.info('Done evaluating dev BLEU, it takes {} seconds'.format(ut.format_seconds(time.time() - start)))
def trim_short_tweets(cutoff=20): """ utility function for deleting short tweets from our database cutoff represents the rough percentage of tweets to be deleted """ load_time = time.time() db = lite.connect(TWEET_DB_PATH) cursor = db.cursor() cursor.execute("SELECT hash FROM tweets") hashes = cursor.fetchall() hashes = set([str(h) for (h,) in hashes]) print('extracted %i hashes in %s' % (len(hashes), utils.format_seconds(time.time()-load_time))) short_hashes = [h for h in hashes if len(h) < cutoff] print("found %i of %i hashes below %i character cutoff" % (len(short_hashes), len(hashes), cutoff)) load_time = time.time() hashvals = ["'%s'" % h for h in short_hashes] db.execute("DELETE FROM tweets WHERE hash IN (%s)" % ",".join(hashvals)) # self.cache.executemany("DELETE FROM tweets WHERE hash=(?)", iter(short_hashes)) db.commit() print('deleted %i hashes in %s' % (len(short_hashes), utils.format_seconds(time.time()-load_time)))
def train(self): env = self.env agent = self.agent args = self.args # pdb.set_trace() t = tqdm.tqdm() while not env.done(): self.step += 1 state = env.state() action = agent.get_action(state) _, log = env.step(action) self.print("> Environment step logging:\tstep %d\taction: %r\t%s" % (self.step, env.wrap_action(action), log_str(log))) # pdb.set_trace() if self.step % args.rl_search_reward_interval == 0: reward, student_metric, log = env.reward() self.print( "> Environment reward logging:\tstep %d\treward: %.4f, student metric: %.4f\t%s" % (self.step, reward, student_metric, log_str(log))) agent.update_reward(reward) if self.step % args.rl_search_learn_interval == 0: agent.learn() agent.reset() assert self.step % args.rl_search_save_interval == 0 self.save(student_metric) elif self.step % args.rl_search_save_interval == 0: student_metric, log = env.validate() self.print( "> Environment validate logging:\tstep %d\tstudent metric: %.4f\t%s" % (self.step, student_metric, log_str(log))) self.save(student_metric) t.update(1) self.clear_print_buffer() print("Avg time: %s" % format_seconds(t.avg_time)) print("Total: %d steps" % t.n) print("Total time: %s" % format_seconds(t.avg_time * t.n)) t.close()
def archive_old_tweets(cutoff=0.2): """cutoff represents the rough fraction of tweets to be archived""" load_time = time.time() db = lite.connect(TWEET_DB_PATH) cursor = db.cursor() cursor.execute("SELECT id FROM tweets") ids = cursor.fetchall() ids = [str(h) for (h,) in ids] print('extracted %i ids in %s' % (len(ids), utils.format_seconds(time.time()-load_time))) ids = sorted(ids) tocull = int(len(ids) * cutoff) ids = ids[:tocull] print('found %i old tweets' % len(ids)) load_time = time.time() ids = ["'%s'" % i for i in ids] # todo we actually want to archive this stuff tho cursor.execute("SELECT * FROM tweets WHERE id IN (%s)" % ",".join(ids)) results = cursor.fetchall() db.execute("DELETE FROM tweets WHERE id IN (%s)" % ",".join(ids)) db.commit() filename = "data/culled_%s.p" % time.strftime("%b%d%H%M") pickle.dump(results, open(filename, 'wb')) print('archived %i hashes in %s' % (len(ids), utils.format_seconds(time.time()-load_time)))
def get_beacon_ratio(ts): """ Calculate ratio of records that were beaconing to all records. Detect beaconing by checking for consistent inter-arrival times. Assumptions: - beaconing is at least 50-80% of the overall activity - beacons don't span multiple rows over multiple seconds """ ts = ts.sort_values().drop_duplicates() timedelta = (ts - ts.shift(1)) ts_beacon = ts[timedelta.between(timedelta.median() - timedelta.std(), timedelta.median() + timedelta.std())] ratio = len(ts_beacon) / float(len(ts)) return pd.DataFrame([{'ratio': ratio, 'timedelta_s': timedelta.median().total_seconds(), 'timedelta_f': format_seconds(timedelta.median().total_seconds())}])
def update_console(self): """ prints various bits of status information to the console. """ # what all do we want to have, here? let's blueprint: # tweets seen: $IN_HAS_TEXT passed filter: $PASSED_F% Hits: $HITS seen_percent = int(100*(float( self.stream_handler.passed_filter)/self.stream_handler.tweets_seen)) runtime = time.time()-self.stats.start_time status = ( 'tweets seen: ' + str(self.stream_handler.tweets_seen) + " passed filter: " + str(self.stream_handler.passed_filter) + " ({0}%)".format(seen_percent) + " hits " + str(self.stats.possible_hits) + " agrams: " + str(self.stats.hits) + " buffer: " + str(self.stream_handler.bufferlength()) + " runtime: " + utils.format_seconds(runtime) ) sys.stdout.write(status + '\r') sys.stdout.flush()
def action_get_usage(self, **kwargs): endpoint = '/user/usage' response = self._make_request(endpoint) # base_bandwidth = response['baseBandwidth'] # offer_bonus_earned = response['offerBonusEarned'] total_limit = response['totalLimit'] percent_used = response['percentUsed'] * 100 balance_remaining = response['balanceRemaining'] # used = total_limit - balance_remaining msg = 'Data plan usage information:\n\n' msg += ( 'You have used %d MB (%.1f%%) out of your plan of %d MB.\n\n' % (utils.b_to_mb(used), percent_used, utils.b_to_mb(total_limit))) end_time = response['endTime'] / 1000 time_remaining = end_time - time.time() msg += 'Time until quota reset: ' msg += utils.format_seconds(time_remaining) return msg
def process_results(self, store): for user, edits in store.query.rawresults.user_edits.items(): newedits = [] for edit in edits: edit = [ s.decode("utf8") if isinstance(s, bytes) else s for s in edit ] t = '' if store.namespaces[edit[0]]: t = store.namespaces[edit[0]] + ":" newedits.append((t + edit[1], self.format_date(edit[2]))) newedits.sort(key=lambda x: x[1].timestamp) store.query.rawresults.user_edits[user] = newedits common_pages = set() if store.allusers: allpages = [] for user in store.users: allpages.append( {p[0] for p in store.query.rawresults.user_edits[user]}) common_pages = set.intersection(*allpages) else: for user in store.users: unionset = set() for user2 in store.users: if user == user2: continue unionset.update( edit[0] for edit in store.query.rawresults.user_edits[user2]) common_pages.update({ edit[0] for edit in store.query.rawresults.user_edits[user] } & unionset) # Find minimum time between edits by different users on the same page # make dict {page: [edit, edit, edit...]} and get the smallest # diff between 2 timestamps by different users page_scores = {} page_hist = {} for page in common_pages: page_edits = [] diffs = [] for user, edits in store.query.rawresults.user_edits.items(): page_edits.extend((user, e[1]) for e in edits if e[0] == page) page_edits.sort(key=lambda x: x[1]) page_hist[page] = page_edits page_edits = self.snip_between(page_edits, key=lambda ed: ed[0]) for i in range(len(page_edits) - 1, 0, -1): if page_edits[i][0] != page_edits[i - 1][0]: # I wish I could use the diff function I wrote :( diffs.append(page_edits[i][1] - page_edits[i - 1][1]) if diffs: smallest = min(diffs) fuzziness = timedelta(days=365) if smallest < fuzziness or 1 == 1: # TODO: GET param this shit page_scores[page] = smallest table = OrderedDict() for page in sorted(page_scores, key=page_scores.get): table[page] = [] table[page].append(page) # title stuff - index 0 duration = format_seconds(page_scores[page]) timeline_url = '/sigma/timeline.py' fragment = { "page": page, "users": tuple(store.users), "server": store.server } if store.startdate: fragment['startdate'] = store.startdate if store.enddate: fragment['enddate'] = store.enddate timeline_url += "?" + urlencode(fragment, doseq=True) table[page].append([duration, timeline_url ]) # min time between edits - index 1 for user in store.users: p_hist = page_hist[page] user_is_first = p_hist[0][0] == user editcount = len([e for e in p_hist if e[0] == user]) table[page].append( (user_is_first, editcount)) # edit count and blue-ness for the user store.table = table
def parse_log_file(self): self.runtime = None self.sent = None self.received = None self.speed = None logger.debug('Parsing log file for snapshot. %r' % { 'volume_id': self.volume.id, 'snapshot_id': self.id, }) try: with open(self.log_path) as log: start_time = None line = log.readline() try: line_split = shlex.split(line) if len(line_split) >= 2: # Get epoch time epoch = line_split[0] + 'T' + line_split[1] epoch = datetime.datetime.strptime(epoch, '%Y/%m/%dT%H:%M:%S') start_time = int(time.mktime(epoch.timetuple())) else: logger.warning('Failed to get snapshot start ' + \ 'time from log, line split length invalid. %r' % { 'volume_id': self.volume.id, 'snapshot_id': self.id, 'log_line': line, }) except ValueError: logger.warning('Failed to get snapshot start ' + \ 'time from log, value error. %r' % { 'volume_id': self.volume.id, 'snapshot_id': self.id, 'log_line': line, }) # Get last kilobyte of file log.seek(0, os.SEEK_END) file_size = log.tell() log.seek(max(file_size - 1024, 0)) lines = log.readlines() # Find rsync sent command line output for line in lines: try: line_split = shlex.split(line) except ValueError: continue if len(line_split) < 10: continue # Get rsync command command = line_split[3] if command == 'sent': if start_time: # Get runtime epoch = line_split[0] + 'T' + line_split[1] epoch = datetime.datetime.strptime(epoch, '%Y/%m/%dT%H:%M:%S') epoch = int(time.mktime(epoch.timetuple())) self.runtime = utils.format_seconds( epoch - start_time) # Get snapshot info try: self.sent = utils.format_bytes(line_split[4]) except ValueError: logger.warning('Failed to get sent bytes ' + \ 'from snapshot log, value error. %r' % { 'volume_id': self.volume.id, 'snapshot_id': self.id, 'log_line': line, }) try: self.received = utils.format_bytes(line_split[7]) except ValueError: logger.warning('Failed to get received bytes ' + \ 'from snapshot log, value error. %r' % { 'volume_id': self.volume.id, 'snapshot_id': self.id, 'log_line': line, }) try: self.speed = utils.format_bytes( line_split[9]) + '/sec' except ValueError: logger.warning('Failed to get transfer speed ' + \ 'from snapshot log, value error. %r' % { 'volume_id': self.volume.id, 'snapshot_id': self.id, 'log_line': line, }) except IOError: logger.debug('Failed to read log file for ' + \ 'snapshot, IOError. %r' % { 'volume_id': self.volume.id, 'snapshot_id': self.id, })
def main(args): mini_str = '/mini' if args.mini else '' # path to mini dataset version_suffix = '_v2.0' if args.squad_version == 2.0 else '' # gets proper dataset version (1.1 or 2.0) # Prepare output directory under ./weights/ to store model-specific data including weights out_dir = 'weights/%s' % args.experiment if os.path.exists(out_dir): print( 'Warning - you are overwriting previous experiment %s. Hit Ctrl Z to abort.\n' % args.experiment) shutil.rmtree(out_dir) os.mkdir(out_dir) logger = open(os.path.join(out_dir, 'log.txt'), 'w') print_and_log( 'Timestamp = %s for %s\n' % (datetime.strftime(datetime.now(), '%m/%d/%Y %H:%M'), args.experiment), logger) # Load Dev Data and save it to this model's weights dir print_and_log('Loading v%s Dev Data...' % args.squad_version, logger) dev_data = load_pk('preprocess/data%s/squad_dev_trees%s.npy' % (mini_str, version_suffix)) dev_batcher = Batcher(dev_data, is_train=False, target_batch_size=args.batch_size) save_as_pk(dev_batcher, os.path.join(out_dir, 'dev_batcher.npy')) print_and_log('Loaded Dev Data...', logger) # Load Train Data and save it to this model's weights dir print_and_log('Loading v%s Train Data...' % args.squad_version, logger) train_data = load_pk('preprocess/data%s/squad_train_trees%s.npy' % (mini_str, version_suffix)) train_batcher = Batcher(train_data, is_train=True, target_batch_size=args.batch_size) print_and_log('Loaded Train Data...', logger) # Create models and optimizers span_extractor = TreeLSTM(use_cuda=args.cuda) answer_verifier = AnswerVerifier(use_cuda=args.cuda) if args.cuda: span_extractor.cuda() answer_verifier.cuda() span_extractor_grad_params = filter(lambda p: p.requires_grad, span_extractor.parameters()) span_extractor_optimizer = optim.Adam(span_extractor_grad_params, args.span_extractor_lr) answer_verifier_grad_params = filter(lambda p: p.requires_grad, answer_verifier.parameters()) answer_verifier_optimizer = optim.Adam(answer_verifier_grad_params, args.answer_verifier_lr) # Determines if question is answerable or not answer_verifier_logistic_loss = BCEWithLogitsLoss( pos_weight=span_extractor.cudify(torch.FloatTensor([0.5]))) best_span_f1 = -1 # Keep track of which epoch model achieves highest span level F1 on the dev set best_answer_verifier_accuracy = -1 best_span_epoch = -1 best_answer_verifier_epoch = -1 for epoch_idx in range(args.epochs): print_and_log('Starting Epoch %d...' % (epoch_idx + 1), logger) train_evaluator = Evaluator( 'train' ) # Stores predictions and returns evaluation string at the end of epoch dev_evaluator = Evaluator('dev') start_time = time() span_extractor.train() answer_verifier.train() while train_batcher.has_next(): # Clear gradients and get next batch span_extractor_optimizer.zero_grad() answer_verifier_optimizer.zero_grad() joint_loss = _run_batch( batch=train_batcher.next(), span_extractor=span_extractor, span_extractor_optimizer=span_extractor_optimizer, answer_verifier=answer_verifier, answer_verifier_optimizer=answer_verifier_optimizer, answer_verifier_logistic_loss=answer_verifier_logistic_loss, evaluator=train_evaluator) joint_loss.backward() # Make a gradient step span_extractor_optimizer.step() answer_verifier_optimizer.step() print_and_log('Took %s.' % format_seconds(time() - start_time), logger) print_and_log('\t' + train_evaluator.eval_string(), logger) span_extractor.eval() answer_verifier.eval() while dev_batcher.has_next(): _run_batch( batch=dev_batcher.next(), span_extractor=span_extractor, span_extractor_optimizer=span_extractor_optimizer, answer_verifier=answer_verifier, answer_verifier_optimizer=answer_verifier_optimizer, answer_verifier_logistic_loss=answer_verifier_logistic_loss, evaluator=dev_evaluator) print_and_log('\t' + dev_evaluator.eval_string(), logger) dev_f1 = dev_evaluator.span_f1() if dev_f1 > best_span_f1: best_span_f1 = dev_f1 best_span_epoch = epoch_idx + 1 torch.save(span_extractor, os.path.join(out_dir, 'best_span_extractor.tar')) dev_answer_verifier_accuracy = dev_evaluator.avg_answer_accuracy() if dev_answer_verifier_accuracy > best_answer_verifier_accuracy: best_answer_verifier_accuracy = dev_answer_verifier_accuracy best_answer_verifier_epoch = epoch_idx + 1 torch.save(answer_verifier, os.path.join(out_dir, 'best_answer_verifier.tar')) print_and_log( '\nBest span = %.4f F1 at %d epoch' % (best_span_f1, best_span_epoch), logger) print_and_log( '\nBest answer verifier = %.4f accuracy at %d epoch' % (best_answer_verifier_accuracy, best_answer_verifier_epoch), logger)
hasrecord = (len(records) > 0) if hasrecord: totalprice = float(bytesin + bytesout) / 1000 / 1000 / 1000 * price_per_gb starttime = records[-1].time starthash = records[-1].hashstr endtime = records[0].time endhash = records[0].hashstr log = add_log(title="BillCalc", params=[ ("user",username), ("starttime",starttime.strftime(timefmt)), ("starthash",starthash), ("endtime",endtime.strftime(timefmt)), ("endhash",endhash), ("bytesin",bytesin), ("bytesout",bytesout), ("duration",duration), ("totalprice",totalprice) ]) if hasrecord: print("Usage for user [%s]:" % username) print(" From <%s> to <%s>" % (starttime.strftime("%Y-%m-%d %H:%M:%S"), endtime.strftime("%Y-%m-%d %H:%M:%S"))) print(" Total upload: " + format_bytes(bytesin)) print(" Total download: " + format_bytes(bytesout)) print(" Total usage: " + format_seconds(duration)) print(" Total price: " + str(totalprice)) print(" Hash code: " + log.hashstr) else: print("No usage for user [%s] since <%s>" % (username, datetime.datetime.strptime(starttime,timefmt).strftime("%Y-%m-%d %H:%M:%S") if starttime else "ever"))