def upload_log_file(bucket, local_path, force=False): start_time = time.time() with open(local_path, 'rb') as f: try: log_data = ircloglib.parse_header(f.readline()) except ircloglib.ParsingError, e: raise UploadError('Error uploading %s: %s' % (local_path, e)) f.seek(0) remote_path = 'rawlogs/%s/%s/%02d.%02d' % (log_data.channel, log_data.start_time.year, log_data.start_time.month, log_data.start_time.day) if not force: if sirc.util.s3.key_exists(bucket, remote_path): remote_timestamp = sirc.util.s3.cached_get_mtime(bucket, remote_path) local_timestamp = file_mtime(local_path) if remote_timestamp >= local_timestamp: #print 'Skipping %s' % (local_path,) return key = boto.s3.key.Key(bucket) key.key = remote_path sys.stdout.write('%s -> s3://%s/%s: ' % (local_path, bucket.name, key.key)) sys.stdout.flush() key.set_contents_from_file(f, cb=upload_callback, num_cb=10)
def parse_log_path(path): if path.startswith(S3_PATH_PREFIX): return metadata_from_s3path(path) else: with open(path, 'rb') as f: data = ircloglib.parse_header(f.readline()) data.path = path return data
def index_records_for_document(doc): first_line = doc.file.readline() log_data = ircloglib.parse_header(first_line) log_data.path = doc.file.name r = index_record_for_day( log_data, datetime.datetime.utcnow().replace(tzinfo=g_utc)) yield r line_num = 0 line = doc.file.readline() while line != '': xformed = index_record_for_line(log_data, line, line_num) line_num += 1 line = doc.file.readline() if xformed: yield xformed
def get_fs_document(doc_path): fp = open(doc_path, 'rb') log_data = ircloglib.parse_header(fp.readline()) fp.seek(0) return Document(log_data, fp)