コード例 #1
0
def reformat(filename):
    parser = TranscriptParser(filename)
    for chunk in parser.get_chunks():
        timestamp = seconds_to_timestamp(chunk['timestamp'])
        for line in chunk['lines']:
            print "%s\t%s:\t %s" % (
                timestamp,
                line['speaker'],
                line['text'],
            )
コード例 #2
0
ファイル: indexer.py プロジェクト: niksbiks/Spacelog
    def index(self):
        current_labels = {}
        current_transcript_page = None
        current_page = 1
        current_page_lines = 0
        current_lang = None
        last_act = None
        previous_log_line_id = None
        previous_timestamp = None
        launch_time = int(
            self.redis_conn.hget("mission:%s" % self.mission_name,
                                 "utc_launch_time"))
        acts = list(Act.Query(self.redis_conn, self.mission_name))
        key_scenes = list(KeyScene.Query(self.redis_conn, self.mission_name))
        glossary_items = dict([
            (item.identifier.lower(), item)
            for item in Glossary.Query(self.redis_conn, self.mission_name)
        ])
        for chunk in self.parser.get_chunks():
            timestamp = chunk['timestamp']
            log_line_id = "%s:%i" % (self.transcript_name, timestamp)
            if timestamp <= previous_timestamp:
                raise Exception, "%s should be after %s" % (
                    seconds_to_timestamp(timestamp),
                    seconds_to_timestamp(previous_timestamp))
            # See if there's transcript page info, and update it if so
            if chunk['meta'].get('_page', 0):
                current_transcript_page = int(chunk["meta"]['_page'])
            if chunk['meta'].get('_lang', None):
                current_lang = chunk['meta']['_lang']
            if current_transcript_page:
                self.redis_conn.set("log_line:%s:page" % log_line_id,
                                    current_transcript_page)
            # Look up the act
            for act in acts:
                if act.includes(timestamp):
                    break
            else:
                print "Error: No act for timestamp %s" % seconds_to_timestamp(
                    timestamp)
                continue
            # If we've filled up the current page, go to a new one
            if current_page_lines >= self.LINES_PER_PAGE or (
                    last_act is not None and last_act != act):
                current_page += 1
                current_page_lines = 0
            last_act = act
            # First, create a record with some useful information
            info_key = "log_line:%s:info" % log_line_id
            info_record = {
                "offset": chunk['offset'],
                "page": current_page,
                "act": act.number,
                "utc_time": launch_time + timestamp,
            }
            if current_transcript_page:
                info_record["transcript_page"] = current_transcript_page
            if current_lang:
                info_record["lang"] = current_lang
            # And an editorial note if present
            if '_note' in chunk['meta']:
                info_record["note"] = chunk['meta']['_note']

            self.redis_conn.hmset(
                info_key,
                info_record,
            )
            # Look up the key scene
            for key_scene in key_scenes:
                if key_scene.includes(timestamp):
                    self.redis_conn.hset(info_key, 'key_scene',
                                         key_scene.number)
                    break
            # Create the doubly-linked list structure
            if previous_log_line_id:
                self.redis_conn.hset(
                    info_key,
                    "previous",
                    previous_log_line_id,
                )
                self.redis_conn.hset(
                    "log_line:%s:info" % previous_log_line_id,
                    "next",
                    log_line_id,
                )
            previous_log_line_id = log_line_id
            previous_timestamp = timestamp
            # Also store the text
            text = u""
            for line in chunk['lines']:
                self.redis_conn.rpush(
                    "log_line:%s:lines" % log_line_id,
                    u"%(speaker)s: %(text)s" % line,
                )
                text += "%s %s" % (line['speaker'], line['text'])
            # Store any images
            for i, image in enumerate(chunk['meta'].get("_images", [])):
                # Make the image id
                image_id = "%s:%s" % (log_line_id, i)
                # Push it onto the images list
                self.redis_conn.rpush(
                    "log_line:%s:images" % log_line_id,
                    image_id,
                )
                # Store the image data
                self.redis_conn.hmset(
                    "image:%s" % image_id,
                    image,
                )
            # Add that logline ID for the people involved
            speakers = set([line['speaker'] for line in chunk['lines']])
            for speaker in speakers:
                self.redis_conn.sadd("speaker:%s" % speaker, log_line_id)
            # Add it to the index for this page
            self.redis_conn.rpush(
                "page:%s:%i" % (self.transcript_name, current_page),
                log_line_id)
            # Add it to the index for this transcript page
            self.redis_conn.rpush(
                "transcript_page:%s:%s" %
                (self.transcript_name, current_transcript_page), log_line_id)
            # Add it into the transcript and everything sets
            self.redis_conn.zadd("log_lines:%s" % self.mission_name,
                                 log_line_id, chunk['timestamp'])
            self.redis_conn.zadd("transcript:%s" % self.transcript_name,
                                 log_line_id, chunk['timestamp'])
            # Read the new labels into current_labels
            has_labels = False
            if '_labels' in chunk['meta']:
                for label, endpoint in chunk['meta']['_labels'].items():
                    if endpoint is not None and label not in current_labels:
                        current_labels[label] = endpoint
                    elif label in current_labels:
                        current_labels[label] = max(current_labels[label],
                                                    endpoint)
                    elif endpoint is None:
                        self.redis_conn.sadd("label:%s" % label, log_line_id)
                        has_labels = True
            # Expire any old labels
            for label, endpoint in current_labels.items():
                if endpoint < chunk['timestamp']:
                    del current_labels[label]
            # Apply any surviving labels
            for label in current_labels:
                self.redis_conn.sadd("label:%s" % label, log_line_id)
                has_labels = True
            # And add this logline to search index
            if has_labels:
                print "weight = 3 for %s" % log_line_id
                weight = 3.0  # magic!
            else:
                weight = 1.0
            self.add_to_search_index(
                mission=self.mission_name,
                id=log_line_id,
                chunk=chunk,
                weight=weight,
                timestamp=timestamp,
            )
            # For any mentioned glossary terms, add to them.
            for word in text.split():
                word = word.strip(",;-:'\"").lower()
                if word in glossary_items:
                    glossary_item = glossary_items[word]
                    self.redis_conn.hincrby(
                        "glossary:%s" % glossary_item.id,
                        "times_mentioned",
                        1,
                    )
            # Increment the number of log lines we've done
            current_page_lines += len(chunk['lines'])
        pages_set = self.redis_conn.hexists("pages:%s" % self.mission_name,
                                            self.transcript_name)
        if not pages_set and current_transcript_page:
            print "%s original pages: %d" % (self.transcript_name,
                                             current_transcript_page)
            self.redis_conn.hset("pages:%s" % self.mission_name,
                                 self.transcript_name, current_transcript_page)
コード例 #3
0
ファイル: indexer.py プロジェクト: javimp89/Spacelog
    def index(self):
        current_labels = {}
        current_transcript_page = None
        current_page = 1
        current_page_lines = 0
        last_act = None
        previous_log_line_id = None
        launch_time = int(self.redis_conn.hget("mission:%s" % self.mission_name, "utc_launch_time"))
        acts = list(Act.Query(self.redis_conn, self.mission_name))
        key_scenes = list(KeyScene.Query(self.redis_conn, self.mission_name))
        glossary_items = dict([
            (item.identifier.lower(), item) for item in
            Glossary.Query(self.redis_conn, self.mission_name)
        ])
        for chunk in self.parser.get_chunks():
            timestamp = chunk['timestamp']
            log_line_id = "%s:%i" % (self.transcript_name, timestamp)
            # See if there's transcript page info, and update it if so
            if chunk['meta'].get('_page', 0):
                current_transcript_page = int(chunk["meta"]['_page'])
            if current_transcript_page:
                self.redis_conn.set("log_line:%s:page" % log_line_id, current_transcript_page)
            # Look up the act
            for act in acts:
                if act.includes(timestamp):
                    break
            else:
                print "Error: No act for timestamp %s" % seconds_to_timestamp(timestamp)
                continue
            # If we've filled up the current page, go to a new one
            if current_page_lines >= self.LINES_PER_PAGE or (last_act is not None and last_act != act):
                current_page += 1
                current_page_lines = 0
            last_act = act
            # First, create a record with some useful information
            info_key = "log_line:%s:info" % log_line_id
            info_record = {
                "offset": chunk['offset'],
                "page": current_page,
                "act": act.number,
                "utc_time": launch_time + timestamp,
            }
            if current_transcript_page:
                info_record["transcript_page"] = current_transcript_page

            self.redis_conn.hmset(
                info_key,
                info_record,
            )
            # Look up the key scene
            for key_scene in key_scenes:
                if key_scene.includes(timestamp):
                    self.redis_conn.hset(info_key, 'key_scene', key_scene.number)
                    break
            # Create the doubly-linked list structure
            if previous_log_line_id:
                self.redis_conn.hset(
                    info_key,
                    "previous",
                    previous_log_line_id,
                )
                self.redis_conn.hset(
                    "log_line:%s:info" % previous_log_line_id,
                    "next",
                    log_line_id,
                )
            previous_log_line_id = log_line_id
            # Also store the text
            text = ""
            for line in chunk['lines']:
                self.redis_conn.rpush(
                    "log_line:%s:lines" % log_line_id,
                    "%(speaker)s: %(text)s" % line,
                )
                text += "%s %s" % (line['speaker'], line['text'])
            # Store any images
            for i, image in enumerate(chunk['meta'].get("_images", [])):
                # Make the image id
                image_id = "%s:%s" % (log_line_id, i)
                # Push it onto the images list
                self.redis_conn.rpush(
                    "log_line:%s:images" % log_line_id,
                    image_id,
                )
                # Store the image data
                self.redis_conn.hmset(
                    "image:%s" % image_id,
                    image,
                )
            # Add that logline ID for the people involved
            speakers = set([ line['speaker'] for line in chunk['lines'] ])
            for speaker in speakers:
                self.redis_conn.sadd("speaker:%s" % speaker, log_line_id)
            # Add it to the index for this page
            self.redis_conn.rpush("page:%s:%i" % (self.transcript_name, current_page), log_line_id)
            # Add it into the transcript and everything sets
            self.redis_conn.zadd("log_lines:%s" % self.mission_name, log_line_id, chunk['timestamp'])
            self.redis_conn.zadd("transcript:%s" % self.transcript_name, log_line_id, chunk['timestamp'])
            # Read the new labels into current_labels
            has_labels = False
            if '_labels' in chunk['meta']:
                for label, endpoint in chunk['meta']['_labels'].items():
                    if endpoint is not None and label not in current_labels:
                        current_labels[label] = endpoint
                    elif label in current_labels:
                        current_labels[label] = max(
                            current_labels[label],
                            endpoint
                        )
                    elif endpoint is None:
                        self.redis_conn.sadd("label:%s" % label, log_line_id)
                        has_labels = True
            # Expire any old labels
            for label, endpoint in current_labels.items():
                if endpoint < chunk['timestamp']:
                    del current_labels[label]
            # Apply any surviving labels
            for label in current_labels:
                self.redis_conn.sadd("label:%s" % label, log_line_id)
                has_labels = True
            # And add this logline to search index
            if has_labels:
                print "weight = 3 for %s" % log_line_id
                weight = 3.0 # magic!
            else:
                weight = 1.0
            self.add_to_search_index(
                mission=self.mission_name,
                id=log_line_id,
                lines = chunk['lines'],
                weight=weight,
                timestamp=timestamp,
            )
            # For any mentioned glossary terms, add to them.
            for word in text.split():
                word = word.strip(",;-:'\"").lower()
                if word in glossary_items:
                    glossary_item = glossary_items[word]
                    self.redis_conn.hincrby(
                        "glossary:%s" % glossary_item.id,
                        "times_mentioned",
                        1,
                    )
            # Increment the number of log lines we've done
            current_page_lines += len(chunk['lines'])
コード例 #4
0
ファイル: parser.py プロジェクト: kidfiction/Spacelog
 def get_chunks(self, offset=0):
     """
     Reads the log lines from the file in order and yields them.
     """
     current_chunk = None
     reuse_line = None
     lines = iter(self.get_lines(offset))
     while lines or reuse_line:
         # If there's a line to reuse, use that, else read a new
         # line from the file.
         if reuse_line:
             line = reuse_line
             reuse_line = None
         else:
             try:
                 line = lines.next()
             except StopIteration:
                 break
             offset += len(line)
             line = line.decode("utf8")
         # If it's a comment or empty line, ignore it.
         if not line.strip() or line.strip()[0] == "#":
             continue
         # If it's a timestamp header, make a new chunk object.
         elif line[0] == "[":
             # Read the timestamp
             try:
                 timestamp = int(line[1:].split("]")[0])
             except ValueError:
                 try:
                     timestamp = timestamp_to_seconds(line[1:].split("]")[0])
                 except ValueError:
                     print "Error: invalid timestamp %s" % (line[1:], )
                     raise
             if current_chunk:
                 yield current_chunk
             # Start a new log line item
             current_chunk = {
                 "timestamp": timestamp,
                 "lines": [],
                 "meta": {},
                 "offset": offset - len(line),
             }
         # If it's metadata, read the entire thing.
         elif line[0] == "_":
             # Meta item
             name, blob = line.split(":", 1)
             while True:
                 try:
                     line = lines.next()
                 except StopIteration:
                     break
                 offset += len(line)
                 line = line.decode("utf8")
                 if not line.strip() or line.strip()[0] == "#":
                     continue
                 if line[0] in string.whitespace:
                     blob += line
                 else:
                     reuse_line = line
                     break
             # Parse the blob
             blob = blob.strip()
             if blob:
                 try:
                     data = json.loads(blob)
                 except ValueError:
                     try:
                         data = json.loads('"%s"' % blob)
                     except ValueError:
                         print "Error: Invalid json at timestamp %s, key %s" % \
                                         (seconds_to_timestamp(timestamp), name)
                         continue
                 current_chunk['meta'][name.strip()] = data
         # If it's a continuation, append to the current line
         elif line[0] in string.whitespace:
             # Continuation line
             if not current_chunk:
                 print "Error: Continuation line before first timestamp header. Line: %s" % \
                                                                     (line)
             elif not current_chunk['lines']:
                 print "Error: Continuation line before first speaker name."
             else:
                 current_chunk['lines'][-1]['text'] += " " + line.strip()
         # If it's a new line, start a new line. Shock.
         else:
             # New line of speech
             try:
                 speaker, text = line.split(":", 1)
             except ValueError:
                 print "Error: First speaker line not in Name: Text format: %s." % (line,)
             else:
                 line = {
                     "speaker": speaker.strip(),
                     "text": text.strip(),
                 }
                 current_chunk['lines'].append(line)
     # Finally, if there's one last chunk, yield it.
     if current_chunk:
         yield current_chunk
コード例 #5
0
ファイル: stats_porn.py プロジェクト: MoriTanosuke/Spacelog
    def build_mission(self, mission):
        print "Building data visualisations for %s..." % mission.name
        for act in list(Act.Query(self.redis_conn, mission.name)):
            print ' ... %s' % act.title

            # Split the act into sections, one for each bar on the graph
            act_duration = act.end - act.start
            section_duration = act_duration // 92
            
            # Count the number of log lines in each segment
            # and find the maximum number of log lines in a segment
            t = act.start            
            segment_line_counts = []
            max_line_count = 0
            real_output_path = self.image_output_path % mission.name
            while t < act.end:
                # Load log lines for this segment
                query = LogLine.Query(self.redis_conn, mission.name).transcript(mission.main_transcript).range(t, t+section_duration)
                line_count = len(list(query))
                # Store segment stats
                max_line_count = max(line_count, max_line_count)
                segment_line_counts.append((t, t+section_duration, line_count))
                t += section_duration

            # Make sure we have an output directoy and work out where to write the image
            try:
                os.makedirs(real_output_path)
            except OSError:
                pass
            graph_file = 'graph_%s_%s.png' % (mission.name, act.number)
            output_path = '%s/%s' % (real_output_path, graph_file)

            # Add initial draw command
            draw_commands = [
                'convert', self.graph_background_file,
                '-fill', self.graph_bar_colour,
            ]

            # Add initial image map tags
            image_map_id = '%s_%s_frequency_graph' % (mission.name, act.number)
            image_map = ['<map id="%s" name="%s">' % (image_map_id, image_map_id)]

            # Iterate over the segments and add them to the draw commands and image map
            for i, line in enumerate(segment_line_counts):
                start, end, count = line
                height = int(round(count / float(max(max_line_count, 1)) * self.max_bar_height))

                bar_width = 6
                bar_spacing = 4

                top_left_x     = i * (bar_width + bar_spacing) + 2
                top_left_y     = self.max_bar_height - height + 14
                bottom_right_x = top_left_x + bar_width
                bottom_right_y = self.max_bar_height + 14

                draw_commands.append('-draw')
                draw_commands.append('rectangle %s,%s,%s,%s' % (top_left_x, top_left_y, bottom_right_x, bottom_right_y))

                if height > 0:
                    image_map.append('<area shape="rect" coords="%(coords)s" href="%(url)s" alt="%(alt)s">' % {
                        "url":    '/%s/%s/#show-selection' % (seconds_to_timestamp(start), seconds_to_timestamp(end)),
                        "alt":    '%d lines between %s and %s' % (count, seconds_to_timestamp(start), seconds_to_timestamp(end)),
                        "coords": '%s,%s,%s,%s' % (top_left_x, top_left_y, bottom_right_x, bottom_right_y),
                    })

            # Output the basic graph image
            draw_commands.append(output_path)
            subprocess.call(draw_commands)

            # Iterate over the key scenes adding them to the graph and image map
            for i, key_scene in enumerate(act.key_scenes()):
                print '     - %s' % key_scene.title

                top_left_x =     int((self.graph_background_width / float(act_duration)) * (key_scene.start - act.start)) + 2
                top_left_y =     self.max_bar_height + 5 + 14
                bottom_right_x = top_left_x + 20
                bottom_right_y = top_left_y + 20
                marker_image =   self.key_scene_marker_files % (i+1)
                
                subprocess.call([
                    'composite',
                    '-geometry', '+%s+%s' % (top_left_x, top_left_y),
                    marker_image,
                    output_path,
                    output_path,
                ])

                image_map.append('<area shape="rect" coords="%(coords)s" href="%(url)s" alt="%(alt)s">' % {
                    "url":      '/%s/%s/#show-selection' % (seconds_to_timestamp(key_scene.start), seconds_to_timestamp(key_scene.end)),
                    "alt":      key_scene.title.decode('utf-8'),
                    "coords":   '%s,%s,%s,%s' % (top_left_x, top_left_y, bottom_right_x, bottom_right_y),
                })

            # Finalise the image map
            image_map.append('</map>')

            self.redis_conn.hmset(
                'act:%s:%s:stats' % (mission.name, act.number),
                {
                    "image_map":    "\n".join(image_map),
                    "image_map_id": image_map_id,
                }
            )
コード例 #6
0
ファイル: stats_porn.py プロジェクト: niksbiks/Spacelog
    def build_mission(self, mission):
        print "Building data visualisations for %s..." % mission.name
        for act in list(Act.Query(self.redis_conn, mission.name)):
            print ' ... %s' % act.title

            # Split the act into sections, one for each bar on the graph
            act_duration = act.end - act.start
            section_duration = act_duration // 92

            # Count the number of log lines in each segment
            # and find the maximum number of log lines in a segment
            t = act.start
            segment_line_counts = []
            max_line_count = 0
            real_output_path = self.image_output_path % mission.name
            while t < act.end:
                # Load log lines for this segment
                query = LogLine.Query(self.redis_conn,
                                      mission.name).transcript(
                                          mission.main_transcript).range(
                                              t, t + section_duration)
                line_count = len(list(query))
                # Store segment stats
                max_line_count = max(line_count, max_line_count)
                segment_line_counts.append(
                    (t, t + section_duration, line_count))
                t += section_duration

            # Make sure we have an output directory and work out where to
            # write the image
            try:
                os.makedirs(real_output_path)
            except OSError:
                pass
            graph_file = 'graph_%s_%s.png' % (mission.name, act.number)
            output_path = '%s/%s' % (real_output_path, graph_file)

            # Add initial draw command
            draw_commands = [
                'convert',
                '-size',
                '%dx%d' % (self.width, self.height),
                'xc:transparent',
                '-fill',
                self.end_marker_colour,
                '-draw',
                "path 'M 1,1  L 10,1  L 5,8  L 1,1",
                '-draw',
                "path 'M 890,1  L 900,1  L 895,8  L 890,1",
                '-fill',
                self.graph_bar_colour,
            ]

            # Add initial image map tags
            image_map_id = '%s_%s_frequency_graph' % (mission.name, act.number)
            image_map = [
                '<map id="%s" name="%s">' % (image_map_id, image_map_id)
            ]

            # Iterate over the segments and add them to the draw commands and image map
            for i, line in enumerate(segment_line_counts):
                start, end, count = line
                height = int(
                    round(count / float(max(max_line_count, 1)) *
                          self.max_bar_height))

                bar_width = 6
                bar_spacing = 4

                top_left_x = i * (bar_width + bar_spacing) + 2
                top_left_y = self.max_bar_height - height + 14
                bottom_right_x = top_left_x + bar_width
                bottom_right_y = self.max_bar_height + 14

                draw_commands.append('-draw')
                draw_commands.append(
                    'rectangle %s,%s,%s,%s' %
                    (top_left_x, top_left_y, bottom_right_x, bottom_right_y))

                if height > 0:
                    image_map.append(
                        '<area shape="rect" coords="%(coords)s" href="%(url)s" alt="%(alt)s">'
                        % {
                            "url":
                            '/%s/%s/#show-selection' %
                            (seconds_to_timestamp(start),
                             seconds_to_timestamp(end)),
                            "alt":
                            '%d lines between %s and %s' %
                            (count, seconds_to_timestamp(start),
                             seconds_to_timestamp(end)),
                            "coords":
                            '%s,%s,%s,%s' % (top_left_x, top_left_y,
                                             bottom_right_x, bottom_right_y),
                        })

            # Output the basic graph image
            draw_commands.append(output_path)
            subprocess.call(draw_commands)

            # Iterate over the key scenes adding them to the graph and image map
            for i, key_scene in enumerate(act.key_scenes()):
                print '     - %s' % key_scene.title

                top_left_x = int(
                    (self.graph_background_width / float(act_duration)) *
                    (key_scene.start - act.start)) + 2
                top_left_y = self.max_bar_height + 5 + 14
                bottom_right_x = top_left_x + 20
                bottom_right_y = top_left_y + 20
                marker_image = self.key_scene_marker_files % (i + 1)

                subprocess.call([
                    'composite',
                    '-geometry',
                    '+%s+%s' % (top_left_x, top_left_y),
                    marker_image,
                    output_path,
                    output_path,
                ])

                image_map.append(
                    '<area shape="rect" coords="%(coords)s" href="%(url)s" alt="%(alt)s">'
                    % {
                        "url":
                        '/%s/%s/#show-selection' %
                        (seconds_to_timestamp(key_scene.start),
                         seconds_to_timestamp(key_scene.end)),
                        "alt":
                        key_scene.title.decode('utf-8'),
                        "coords":
                        '%s,%s,%s,%s' % (top_left_x, top_left_y,
                                         bottom_right_x, bottom_right_y),
                    })

            # Finalise the image map
            image_map.append('</map>')

            self.redis_conn.hmset(
                'act:%s:%s:stats' % (mission.name, act.number), {
                    "image_map": "\n".join(image_map),
                    "image_map_id": image_map_id,
                })
コード例 #7
0
ファイル: parser.py プロジェクト: ariel/Spacelog
 def get_chunks(self, offset=0):
     """
     Reads the log lines from the file in order and yields them.
     """
     current_chunk = None
     reuse_line = None
     lines = iter(self.get_lines(offset))
     while lines or reuse_line:
         # If there's a line to reuse, use that, else read a new
         # line from the file.
         if reuse_line:
             line = reuse_line
             reuse_line = None
         else:
             try:
                 line = lines.next()
             except StopIteration:
                 break
             offset += len(line)
             line = line.decode("utf8")
         # If it's a comment or empty line, ignore it.
         if not line.strip() or line.strip()[0] == "#":
             continue
         # If it's a timestamp header, make a new chunk object.
         elif line[0] == "[":
             # Read the timestamp
             try:
                 timestamp = int(line[1:].split("]")[0])
             except ValueError:
                 timestamp = timestamp_to_seconds(line[1:].split("]")[0])
             if current_chunk:
                 yield current_chunk
             # Start a new log line item
             current_chunk = {
                 "timestamp": timestamp,
                 "lines": [],
                 "meta": {},
                 "offset": offset - len(line),
             }
         # If it's metadata, read the entire thing.
         elif line[0] == "_":
             # Meta item
             name, blob = line.split(":", 1)
             while True:
                 try:
                     line = lines.next()
                 except StopIteration:
                     break
                 offset += len(line)
                 line = line.decode("utf8")
                 if not line.strip() or line.strip()[0] == "#":
                     continue
                 if line[0] in string.whitespace:
                     blob += line
                 else:
                     reuse_line = line
                     break
             # Parse the blob
             blob = blob.strip()
             if blob:
                 try:
                     data = json.loads(blob)
                 except ValueError:
                     try:
                         data = json.loads('"%s"' % blob)
                     except ValueError:
                         print "Error: Invalid json at timestamp %s, key %s" % \
                                         (seconds_to_timestamp(timestamp), name)
                         continue
                 current_chunk['meta'][name.strip()] = data
         # If it's a continuation, append to the current line
         elif line[0] in string.whitespace:
             # Continuation line
             if not current_chunk:
                 print "Error: Continuation line before first timestamp header. Line: %s" % \
                                                                     (line)
             elif not current_chunk['lines']:
                 print "Error: Continuation line before first speaker name. Timestamp %s" % \
                                                                     (seconds_to_timestamp(timestamp))
             else:
                 current_chunk['lines'][-1]['text'] += " " + line.strip()
         # If it's a new line, start a new line. Shock.
         else:
             # New line of speech
             try:
                 speaker, text = line.split(":", 1)
             except ValueError:
                 print "Error: First speaker line not in Name: Text format: %s. Timestamp %s" % \
                                                                     (line, seconds_to_timestamp(timestamp))
             else:
                 line = {
                     "speaker": speaker.strip(),
                     "text": text.strip(),
                 }
                 current_chunk['lines'].append(line)
     # Finally, if there's one last chunk, yield it.
     if current_chunk:
         yield current_chunk