def main(): import sys import glob import time import os start = time.time() inputdir = sys.argv[1] outputfile = sys.argv[2] valid_user_file = sys.argv[3] valid_users = set([line.strip() for line in open(valid_user_file)]) split_func = split_by_5_minute csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv')) n = len(csvfiles) with open(outputfile, 'w') as outputfileobj: for idx, csvfile in enumerate(csvfiles): uid = get_uid(csvfile) if uid not in valid_users: continue with open(csvfile, buffering=(2 << 27)) as f: rows = prepare_user_log(f) logs = extract_location(rows, split_func) save_to_csv(outputfileobj, uid, logs) logging.info('[%d/%d]' % (idx + 1, n)) logging.info('finish with time %s', str(time.time() - start))
def start_btsync(request): """ Run BitTorrent Sync application """ global pid if is_btsync_active(): return HttpResponseRedirect('/') # If wrong structure of config file, return an error if 'btsync_conf_file' not in config: return HttpResponse('Klucz btsync_conf_file nie istnieje w pliku konfiguracyjnym') # If btsync-folder doesn't exist, create it btsync_conf = load_json(config['btsync_conf_file']) if not os.path.exists(btsync_conf['storage_path']): os.makedirs(btsync_conf['storage_path']) # If BTSync config file doesn't exist, create a new one if not path.isfile(config['btsync_conf_file']): create_empty_btsync_config_file() # Start BTSync process if platform.system() == 'Windows': pass # for the future elif platform.system() == 'Linux': pid = subprocess.Popen([config['btsync_exe_file'], '--config', config['btsync_conf_file']]) while not is_btsync_active(): pass # need this line to wait for BTSync to start if 'uid' not in config: config['uid'] = get_uid(config['btsync_server_address']) save_json(os.path.join(config['application_path'], 'config.json'), config) return HttpResponseRedirect('/')
def main(): import sys import glob import time import os start = time.time() inputdir = sys.argv[1] topdomainfile = sys.argv[2] seconddomainfile = sys.argv[3] csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv')) n = len(csvfiles) topdomainobj = open(topdomainfile, 'w') seconddomainobj = open(seconddomainfile, 'w') for idx, csvfile in enumerate(csvfiles): uid = get_uid(csvfile) with open(csvfile, buffering=(2 << 27)) as f: topcounter, secondcounter = domain_statistic(f) save_to_csv(topdomainobj, uid, topcounter) save_to_csv(seconddomainobj, uid, secondcounter) logging.info('[%d/%d]' % (idx + 1, n)) logging.info('finish with time %s', str(time.time() - start)) topdomainobj.close() seconddomainobj.close()
def get_team_all_players(self): """给未进入大名单球员增加一条虚记录 """ tree = None try: tree = ET.parse(self.player_template) except Exception as e: print(e) players = set() root = tree.getroot() for entry in root[0][0]: clubid = common.get_uid(int(entry[4].text), self.sportType) if clubid == self.homeid or clubid == self.awayid: pid = int(entry[0].text) players.add(int(entry[0].text)) return players
def main(): import sys import glob import time import os import redis start = time.time() inputdir = sys.argv[1] outputfile = sys.argv[2] valid_user_file = sys.argv[3] topdomainfile = sys.argv[4] seconddomainfile = sys.argv[5] blacklistdomainfile = sys.argv[6] topdomain_set = set([line.strip().split(',')[0] for line in open(topdomainfile)]) seconddomain_set = set([line.strip().split(',')[0] for line in open(seconddomainfile)]) blacklistdomain_set = set([line.strip().split(',')[0] for line in open(blacklistdomainfile)]) valid_users = set([line.strip() for line in open(valid_user_file)]) split_func = split_by_5_minute get_domain_func = generate_get_right_domain(topdomain_set, seconddomain_set, blacklistdomain_set) r = redis.StrictRedis(host='localhost', port=6379, db=0) csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv')) n = len(csvfiles) outputobj = open(outputfile, 'w') for idx, csvfile in enumerate(csvfiles): uid = get_uid(csvfile) if uid not in valid_users: continue f = load_file(r, csvfile) counter = app_and_category_statistic(f, get_domain_func, split_func) f.close() save_to_csv(outputobj, uid, counter) logging.info('[%d/%d]' % (idx + 1, n)) logging.info('finish with time %s', str(time.time() - start)) outputobj.close()
def test_get_uid(self): self.assertEqual('0001998', com.get_uid('/anb/0001998.csv'))
for split_time, count in counter.iteritems(): writer.writerow({'uid': uid, 'time': split_time, 'request_count': count}) if __name__ == '__main__': import sys import glob import time start = time.time() inputdir = sys.argv[1] outputfile = sys.argv[2] split_type = int(sys.argv[3]) split_func = split_by_hour if split_type == 1 else split_by_halfhour csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv')) n = len(csvfiles) if os.path.isfile(outputfile): os.remove(outputfile) with open(outputfile, 'a') as outputfileobj: for idx, csvfile in enumerate(csvfiles): uid = get_uid(csvfile) with open(csvfile) as f: counter = gprs_statistic(f, split_func) save_to_csv(outputfileobj, uid, counter) logging.info('[%d/%d]' % (idx + 1, n)) logging.info('finish with time %s', str(time.time() - start))