def get_failed_pages(result_log_dir='logs/tests/', page_dir='allpages/'): n = 0 files = utility.get_files(result_log_dir) total_lines = utility.lines_in_dir(result_log_dir) start_time = time.time() for fname in files: results = [] with open(result_log_dir + fname, 'r') as f: results = f.read().split('\n') # Each line should be a dictionary containing the filename and the results results = filter(lambda i: len(i) > 0, results) results = map(ast.literal_eval, results) for i, (fname, fresults) in enumerate(map(lambda d: (d['fname'], d['result']), results)): utility.show_bar(n, total_lines, message='Checking fails ({} of {}): '.format(n, total_lines), start_time=start_time) n += 1 try: lines = [] with open(page_dir + fname, 'r') as f: lines = f.read().split('\n') for line, result in zip(lines, fresults): if not result and line != '/wiki/Main_Page': yield (fname, line) except IOError: print('File \'{}\' not found.'.format(fname)) print('')
def get_netStats(self): rows = [] cls_attributes = util.get_cls_attributes(meta_db.NetStats) for file in util.get_files(self.pattern + '*_net_stats.json'): json = util.get_json_from_file(file, self.is2move) if json and json['status'] == 'OK' and json['data']: json['data']['file_timestamp'] = util.get_timestamp_from_file( file) rows.append(self._get_dict(json['data'], cls_attributes)) return rows
def get_tx(self): txs = [] cls_attributes = util.get_cls_attributes(meta_db.Transaction) for file in util.get_files(self.pattern + '*pending_txs.json'): txs_json = util.get_json_from_file(file) if txs_json: for tx in txs_json: tx['file_timestamp'] = util.get_timestamp_from_file(file) txs.append(self._get_dict(tx, cls_attributes)) return txs
def _get_rows(self, table, file_extension): rows = [] cls_attributes = util.get_cls_attributes(table) print(cls_attributes) for file in util.get_files(self.pattern + file_extension): json = util.get_json_from_file(file, self.is2move) if json: json['file_timestamp'] = util.get_timestamp_from_file(file) rows.append(self._get_dict(json, cls_attributes)) return rows
def __init__(self, host, port, nowrite=False, noupdate=False, target='Philosophy'): self.host = host self.port = port self.nowrite = nowrite self.noupdate = noupdate self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.working_file = '' self.Tracer = wikipedia.WikipediaTracer() self.Tracer.set_target(target) self.id = socket.gethostname() self.work_file = 'worker/{}-workfile.txt'.format(self.id) self.cache_file = 'worker/{}-cachefile.txt'.format(self.id) self.log_dir = 'worker/{}/'.format(self.id) self.fname = '' # The serverside name of the file we're working on self.results = [] self.f = None try: os.mkdir(self.work_file.split('/')[0] + '/') except: pass try: os.mkdir(self.log_dir) except: pass cache_files = utility.get_files('worker/', search_re=r'cachefile') cache_files = map(lambda fname: 'worker/' + fname, cache_files) self.Tracer.mergeCaches(cache_files, new_name=self.cache_file) # In case we've run it with this computer before self.Tracer.articles = utility.lines_in_dir(self.log_dir)
def __init__(self, host='localhost', port=60000, directory='allpages/', finished_directory='completed/', temp_directory='temp/', verify_directory='verify/', mode='new'): self.host = host self.port = port self.directory = directory self.finished_directory = finished_directory self.temp_directory = temp_directory self.verify_directory = verify_directory self.files = [] self.verify_files = [] self.in_use_files = [] self.finished_files = [] self.verify_files = [] self.clients = {} self.client_stats = {} try: os.mkdir('logs/tests/') except: pass if mode == 'new': self.files = utility.get_files(self.directory) try: os.mkdir(self.finished_directory) except: pass try: os.mkdir(self.temp_directory) except: pass try: os.mkdir(self.verify_directory) except: pass for i, f in enumerate(self.files): utility.show_bar(i, len(self.files), number_limit=True, message='Copying to {}: '.format( self.temp_directory)) shutil.copy(self.directory + f, self.temp_directory + f) print('') elif mode == 'continue': print('Loading temp files.') self.files = utility.get_files(self.temp_directory) print('Loading finished files.') self.finished_files = utility.get_files(self.finished_directory) elif mode == 'update': self.files = utility.get_files(self.temp_directory) self.finished_files = utility.get_files(self.finished_directory) all_files = self.files + self.finished_files check_files = utility.get_files(self.directory) for i, fname in enumerate(check_files): utility.show_bar(i, len(check_files), message='Updating files: ') if not fname in all_files: shutil.copy(self.directory + fname, self.temp_directory + fname) print('') self.files = utility.get_files(self.temp_directory) # We should probably verify all the error files at some point. self.start_time = time.time() self.finished_since_start = 0 def get_next_file(client): if not client in self.clients: self.clients[client] = [] self.client_stats[client] = {} for fname in self.verify_files + self.files: if not fname in self.in_use_files: self.in_use_files.append(fname) self.clients[client].append(fname) return fname def finish_file(client, fname, result): self.files.remove(fname) self.in_use_files.remove(fname) self.finished_files.append((fname, result)) self.clients[client].remove(fname) if 'finished' in self.client_stats[client]: self.client_stats[client]['finished'] += 1 else: self.client_stats[client]['finished'] = 1 shutil.move(self.temp_directory + fname, self.finished_directory + fname) self.finished_since_start += 1 elapsed = time.time() - self.start_time estimated_remaining = elapsed / self.finished_since_start * (len( self.files)) print('Finished {} files so far. {} remaining.'.format( len(self.finished_files), utility.display_time(estimated_remaining))) utility.show_dict(self.client_stats) self.write_results_to_file('logs/tests/', fname, result) self.server = SocketServer.TCPServer((self.host, self.port), WikiClientHandler) # So the handlers can interact with us self.server.get_next_file = get_next_file self.server.finish_file = finish_file self.server.directory = directory self.server.finished_directory = finished_directory self.server.temp_directory = temp_directory
labels.txt: include all the activity labels available for the dataset (1 per row). Column 1: experiment number ID, Column 2: user number ID, Column 3: activity number ID Column 4: Label start point (in number of signal log samples (recorded at 50Hz)) Column 5: Label end point (in number of signal log samples) Output: .csv files containing data at 5Hz, 10Hz, 25Hz and 50Hz with labels [experimentID userID activityID tBodyAcc-X tBodyAcc-Y tBodyAcc-Z tGravityAcc-X tGravityAcc-Y tGravityAcc-Z tBodyGyro-X tBodyGyro-Y] Time domain signals are prefixed with 't' to denote time. """ # get raw data files path_to_raw_data_files = "C:\\Users\sri01\Downloads\DS 6999\HAPT Data Set\RawData\*.txt" files = ut.get_files(path_to_raw_data_files) # This is the directory where you want to write the new csvs to path_to_output_directory = "C:\\Users\sri01\Downloads\DS 6999\HAPT Processed2" ut.set_output_path(path_to_output_directory) # get data from labels file def get_label_data(files): for file in files: if file.__contains__('labels'): label_file = file break return ut.get_data(label_file)