def split(fpath, outputDir): global outputFile, fileCount, lastWeek print "Splitting data for %s" % (fpath,) f = GzipFile(fpath) if fpath.endswith(".gz") else open(fpath) for line in f: if line.startswith("2010/0"): date = line[:10] date = date.replace("/", "") hours = line[11:13] dt = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) currentWeek = dt.isocalendar()[1] if dt.weekday() == 0 and hours <= "06": currentWeek -= 1 if lastWeek != currentWeek: if outputFile: outputFile.close() outputFile = open(os.path.join(outputDir, "request.log.%s" % (date,)), "w") fileCount += 1 lastWeek = currentWeek print "Changed to week of %s" % (date,) output = ["-----\n"] output.append(line) try: output.append(f.next()) output.append(f.next()) output.append(f.next()) except StopIteration: break outputFile.write("".join(output)) f.close()
def main(path, verify=False, format='json'): code = 0 try: if path.endswith('.pkg.tar.gz'): f = GzipFile(path) elif path.endswith('.pkg.tar.xz'): f = LZMAFile(path) else: print >> sys.stderr, path, 'does not look like a package file.' return 1 f = TarFile(fileobj=f) while True: info = f.next() if info.name == '.PKGINFO': break else: print >> sys.stderr, path, 'does not contain .PKGINFO' return 1 if verify: try: f._load() except IOError: print >> sys.stderr, 'failed to verify', path code = 2 ret = {} for line in f.extractfile(info).readlines(): line = line.strip() if not line or line.startswith('#'): continue if format in ('json',): key, value = map(str.strip, line.split('=', 1)) if key in ret: if isinstance(ret[key], list): ret[key].append(value) else: ret[key] = [ret[key], value] else: ret[key] = value else: print line if format in ('json',): print ujson.dumps(ret) except IOError: print >> sys.stderr, path, 'is not a valid package file.' return 1 else: return code
def split(fpath, outputDir): global outputFile, fileCount, lastWeek print("Splitting data for %s" % (fpath, )) f = GzipFile(fpath) if fpath.endswith(".gz") else open(fpath) for line in f: if line.startswith("2010/0"): date = line[:10] date = date.replace("/", "") hours = line[11:13] dt = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) currentWeek = dt.isocalendar()[1] if dt.weekday() == 0 and hours <= "06": currentWeek -= 1 if lastWeek != currentWeek: if outputFile: outputFile.close() outputFile = open( os.path.join(outputDir, "request.log.%s" % (date, )), "w") fileCount += 1 lastWeek = currentWeek print("Changed to week of %s" % (date, )) output = ["-----\n"] output.append(line) try: output.append(f.next()) line = f.next() if line.startswith("Memory"): line = f.next() output.append(line) output.append(f.next()) except StopIteration: break outputFile.write("".join(output)) f.close()
def parseStats(logFilePath, donormlize=True, verbose=False): fpath = os.path.expanduser(logFilePath) if fpath.endswith(".gz"): f = GzipFile(fpath) else: f = open(fpath) # Punt past data for line in f: if line.startswith("---"): break f.close() entries = [] for line in f: bits = line.split("|") if len(bits) > COLUMN_query: while bits[COLUMN_query].endswith("+"): line = f.next() newbits = line.split("|") bits[COLUMN_query] = bits[COLUMN_query][:-1] + newbits[ COLUMN_query] pos = bits[COLUMN_query].find("BEGIN:VCALENDAR") if pos != -1: bits[COLUMN_query] = bits[COLUMN_query][:pos] if donormlize: bits[COLUMN_query] = sqlnormalize(bits[COLUMN_query].strip()) if bits[COLUMN_query] not in ( "BEGIN", "COMMIT", "ROLLBACK", ) and bits[COLUMN_query].find("pg_catalog") == -1: bits = [bit.strip() for bit in bits] entries.append(bits) if verbose and divmod(len(entries), 1000)[1] == 0: print("%d entries" % (len(entries), )) # if float(bits[COLUMN_total_time]) > 1: # print(bits[COLUMN_total_time], bits[COLUMN_query]) if verbose: print("Read %d entries" % (len(entries, ))) sqlStatementsReport(entries)
def parseStats(logFilePath, donormlize=True, verbose=False): fpath = os.path.expanduser(logFilePath) if fpath.endswith(".gz"): f = GzipFile(fpath) else: f = open(fpath) # Punt past data for line in f: if line.startswith("---"): break entries = [] for line in f: bits = line.split("|") if len(bits) > COLUMN_query: while bits[COLUMN_query].endswith("+"): line = f.next() newbits = line.split("|") bits[COLUMN_query] = bits[COLUMN_query][:-1] + newbits[COLUMN_query] pos = bits[COLUMN_query].find("BEGIN:VCALENDAR") if pos != -1: bits[COLUMN_query] = bits[COLUMN_query][:pos] if donormlize: bits[COLUMN_query] = sqlnormalize(bits[COLUMN_query].strip()) if bits[COLUMN_query] not in ( "BEGIN", "COMMIT", "ROLLBACK", ) and bits[COLUMN_query].find("pg_catalog") == -1: bits = [bit.strip() for bit in bits] entries.append(bits) if verbose and divmod(len(entries), 1000)[1] == 0: print("%d entries" % (len(entries),)) #if float(bits[COLUMN_total_time]) > 1: # print(bits[COLUMN_total_time], bits[COLUMN_query]) if verbose: print("Read %d entries" % (len(entries,))) sqlStatementsReport(entries)
#exit() #for log_path in glob(input_dir + '/*.*'): #if 0: base, ext = splitext(key.name) if ext == '.gz': fp = GzipFile(output_dir + key.name, 'r') name = base else: print 'file is not gzipped...' #print name fp.next() fp.next() input_rows = csv.reader(fp, dialect=csv.excel_tab) for row in input_rows: #print row try: uri = row[7].split('/') basemap = uri[1] x = int(uri[3]) image_file = uri[4].split('.') y = int(image_file[0])